Example #1
0
def append_filled_h5_col(dats, colname, data_len):
    """
    For ``colname`` that has newly appeared in the CXC content file due to a TDB
    change, append a sufficient number of empty, bad-quality rows to offset to the start
    of the first ``dats`` table with that ``colname``.

    ``data_len`` represents the data length to the END of the current ``dats``
    for this content type.
    """
    # Drop all dats until the first one that has the new colname, then include
    # all after that.
    new_dats = list(
        itertools.dropwhile(lambda x: colname not in x.dtype.names, dats))
    stacked_data = np.hstack([x[colname] for x in new_dats])
    fill_len = data_len - len(stacked_data)
    if fill_len < 0:
        raise ValueError(
            'impossible negative length error {}'.format(fill_len))

    zeros = np.zeros(fill_len, dtype=stacked_data.dtype)
    quals = np.zeros(fill_len, dtype=bool)

    # Append zeros (for the data type) and quality=True (bad)
    h5 = tables.openFile(msid_files['msid'].abs, mode='a')
    logger.verbose('Appending %d zeros to %s' %
                   (len(zeros), msid_files['msid'].abs))
    if not opt.dry_run:
        h5.root.data.append(zeros)
        h5.root.quality.append(quals)
    h5.close()

    # Now actually append the new data
    append_h5_col(new_dats, colname, [])
Example #2
0
def make_h5_col_file(dats, colname):
    """Make a new h5 table to hold column from ``dat``."""
    filename = msid_files['msid'].abs
    filedir = os.path.dirname(filename)
    if not os.path.exists(filedir):
        os.makedirs(filedir)

    # Estimate the number of rows for 20 years based on available data
    times = np.hstack([x['TIME'] for x in dats])
    dt = np.median(times[1:] - times[:-1])
    n_rows = int(86400 * 365 * 20 / dt)

    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.openFile(filename, mode='w', filters=filters)

    col = dats[-1][colname]
    h5shape = (0, ) + col.shape[1:]
    h5type = tables.Atom.from_dtype(col.dtype)
    h5.createEArray(h5.root,
                    'data',
                    h5type,
                    h5shape,
                    title=colname,
                    expectedrows=n_rows)
    h5.createEArray(h5.root,
                    'quality',
                    tables.BoolAtom(), (0, ),
                    title='Quality',
                    expectedrows=n_rows)
    logger.verbose(
        'WARNING: made new file {} for column {!r} shape={} with n_rows(1e6)={}'
        .format(filename, colname, h5shape, n_rows / 1.0e6))
    h5.close()
Example #3
0
def make_dir(dir_):
    """Make a directory if it doesn't exist."""
    if not os.path.isdir(dir_):
        os.makedirs(dir_)
        if not os.path.isdir(dir_):
            raise pyyaks.task.TaskFailure('Failed to make directory %s' % dir_)
        logger.verbose('Made directory ' + dir_)
Example #4
0
def move_archive_files(filetype, archfiles):
    ft['content'] = filetype.content.lower()

    stagedir = arch_files['stagedir'].abs
    if not os.path.exists(stagedir):
        os.makedirs(stagedir)

    for f in archfiles:
        if not os.path.exists(f):
            continue
        ft['basename'] = os.path.basename(f)
        tstart = re.search(r'(\d+)', str(ft['basename'])).group(1)
        datestart = DateTime(tstart).date
        ft['year'], ft['doy'] = re.search(r'(\d\d\d\d):(\d\d\d)',
                                          datestart).groups()

        archdir = arch_files['archdir'].abs
        archfile = arch_files['archfile'].abs

        if not os.path.exists(archdir):
            os.makedirs(archdir)

        if not os.path.exists(archfile):
            logger.info('mv %s %s' % (os.path.abspath(f), archfile))
            if not opt.dry_run:
                if not opt.occ:
                    shutil.copy2(f, stagedir)
                shutil.move(f, archfile)

        if os.path.exists(f):
            logger.verbose('Unlinking %s' % os.path.abspath(f))
            os.unlink(f)
Example #5
0
def move_archive_files(filetype, archfiles):
    ft['content'] = filetype.content.lower()

    stagedir = arch_files['stagedir'].abs
    if not os.path.exists(stagedir):
        os.makedirs(stagedir)

    for f in archfiles:
        if not os.path.exists(f):
            continue
        ft['basename'] = os.path.basename(f)
        tstart = re.search(r'(\d+)', str(ft['basename'])).group(1)
        datestart = DateTime(tstart).date
        ft['year'], ft['doy'] = re.search(r'(\d\d\d\d):(\d\d\d)', datestart).groups()

        archdir = arch_files['archdir'].abs
        archfile = arch_files['archfile'].abs

        if not os.path.exists(archdir):
            os.makedirs(archdir)

        if not os.path.exists(archfile):
            logger.info('mv %s %s' % (os.path.abspath(f), archfile))
            if not opt.dry_run:
                if not opt.occ:
                    shutil.copy2(f, stagedir)
                shutil.move(f, archfile)

        if os.path.exists(f):
            logger.verbose('Unlinking %s' % os.path.abspath(f))
            os.unlink(f)
Example #6
0
def get_id():
    """
    Get the YYYYDOY identifier for the dark cal.
    """
    DARK_CAL['id'] = get_dark_cal_id(DARK_CAL['start'].val)
    logger.verbose('Dark cal starting at {} has id={}'
                   .format(DARK_CAL['start'], DARK_CAL['id']))
Example #7
0
def calc_ra_dec():
    """Calculate decimal RA and Dec from sexigesimal input in source data."""
    
    pos_str = source['ra_hms'].val + " " + source['dec_dms'].val
    pos_str = re.sub(r'[,:dhms]', ' ', pos_str)
    args = pos_str.split()

    if len(args) != 6:
        raise ValueError("Input source position '%s' needs 6 values" % pos_str)

    rah = int(args[0])
    ram = int(args[1])
    ras = float(args[2])
    decsign = '-' if args[3].startswith('-') else '+'
    decd = abs(int(args[3]))
    decm = int(args[4])
    decs = float(args[5])

    ra = 15.0 * (rah + ram/60. + ras/3600.)
    dec = abs(decd) + decm/60. + decs/3600.
    if decsign == '-':
        dec = -dec

    source['ra'] = ra
    source['dec'] = dec
    logger.verbose(pyyaks.context.render('RA={{source.ra}} Dec={{source.dec}}'))
Example #8
0
def read_derived(i, filename, filetype, row, colnames, archfiles, db):
    """Read derived data using eng_archive and derived computation classes.
    ``filename`` has format <content>_<index0>_<index1> where <content>
    is the content type (e.g. "dp_thermal128"), <index0> is the start index for
    the new data and index1 is the end index (using Python slicing convention
    index0:index1).  Args ``i``, ``filetype``, and ``row`` are as in
    read_archive().  ``row`` must equal <index0>.  ``colnames`` is the list of
    column names for the content type.
    """
    # Check if filename is already in archfiles.  If so then abort further processing.

    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)):
        logger.verbose('File %s already in archfiles - skipping' % filename)
        return None, None

    # f has format <content>_<index0>_<index1>
    # <content> has format dp_<content><mnf_step> e.g. dp_thermal128
    content, index0, index1 = filename.split(':')
    index0 = int(index0)
    index1 = int(index1)
    mnf_step = int(re.search(r'(\d+)$', content).group(1))
    time_step = mnf_step * derived.MNF_TIME
    times = time_step * np.arange(index0, index1)

    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    vals = {}
    bads = np.zeros((len(times), len(colnames)), dtype=np.bool)
    for i, colname in enumerate(colnames):
        if colname == 'TIME':
            vals[colname] = times
            bads[:, i] = False
        else:
            dp_class = getattr(Ska.engarchive.derived, colname.upper())
            dp = dp_class()
            dataset = dp.fetch(times[0] - 1000, times[-1] + 1000)
            ok = (index0 <= dataset.indexes) & (dataset.indexes < index1)
            vals[colname] = dp.calc(dataset)[ok]
            bads[:, i] = dataset.bads[ok]

    vals['QUALITY'] = bads
    dat = Ska.Numpy.structured_array(vals, list(colnames) + ['QUALITY'])

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    date = DateTime(times[0]).date
    year, doy = date[0:4], date[5:8]
    archfiles_row = dict(filename=filename,
                         filetime=int(index0 * time_step),
                         year=year,
                         doy=doy,
                         tstart=times[0],
                         tstop=times[-1],
                         rowstart=row,
                         rowstop=row + len(dat),
                         startmjf=index0,
                         stopmjf=index1,
                         date=date)

    return dat, archfiles_row
Example #9
0
 def setup(self):
     self.skip = False
     depends_ok, msg = check_depend(self.depends, self.targets)
     if depends_ok and self.targets:
         self.skip = True
         logger.verbose('Skipping because dependencies met')
         raise TaskSkip
Example #10
0
def get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt):
    # Iterate over sync files that contain new data
    dats = []
    for date_id, filetime0, filetime1, row0, row1 in index_tbl:
        # Limit processed archfiles by date
        if filetime0 > DateTime(opt.date_stop).secs:
            logger.verbose(f'Index {date_id} filetime0 > date_stop, breaking')
            break

        # Compare date_id of this row to last one that was processed.  These
        # are lexically ordered
        if date_id <= last_date_id:
            logger.verbose(f'Index {date_id} already processed, skipping')
            continue

        # File names like sync/acis4eng/2019-07-08T1150z/5min.npz
        last_date_id = ft['date_id'] = date_id

        # Read the file with all the MSID data as a hash with keys {msid}.data
        # {msid}.row0, {msid}.row1
        with get_readable(opt.sync_root, opt.is_url, sync_files['data']) as (data_input, uri):
            with timing_logger(logger, f'Reading update date file {uri}'):
                with gzip.open(data_input, 'rb') as fh:
                    dat = pickle.load(fh)
                    if dat:
                        # Stat pickle dict can be empty, e.g. in the case of a daily file
                        # with no update.
                        dats.append(dat)

    return dats, last_date_id
Example #11
0
def make_h5_col_file(dats, colname):
    """Make a new h5 table to hold column from ``dat``."""
    filename = msid_files['msid'].abs
    filedir = os.path.dirname(filename)
    if not os.path.exists(filedir):
        os.makedirs(filedir)

    # Estimate the number of rows for 20 years based on available data
    times = np.hstack([x['TIME'] for x in dats])
    dt = np.median(times[1:] - times[:-1])
    n_rows = int(86400 * 365 * 20 / dt)

    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.open_file(filename, mode='w', filters=filters)

    col = dats[-1][colname]
    h5shape = (0,) + col.shape[1:]
    h5type = tables.Atom.from_dtype(col.dtype)
    h5.create_earray(h5.root, 'data', h5type, h5shape, title=colname,
                     expectedrows=n_rows)
    h5.create_earray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality',
                     expectedrows=n_rows)
    logger.verbose('WARNING: made new file {} for column {!r} shape={} with n_rows(1e6)={}'
                   .format(filename, colname, h5shape, n_rows / 1.0e6))
    h5.close()
Example #12
0
def append_filled_h5_col(dats, colname, data_len):
    """
    For ``colname`` that has newly appeared in the CXC content file due to a TDB
    change, append a sufficient number of empty, bad-quality rows to offset to the start
    of the first ``dats`` table with that ``colname``.

    ``data_len`` represents the data length to the END of the current ``dats``
    for this content type.
    """
    # Drop all dats until the first one that has the new colname, then include
    # all after that.
    new_dats = list(itertools.dropwhile(lambda x: colname not in x.dtype.names, dats))
    stacked_data = np.hstack([x[colname] for x in new_dats])
    fill_len = data_len - len(stacked_data)
    if fill_len < 0:
        raise ValueError('impossible negative length error {}'.format(fill_len))

    zeros = np.zeros(fill_len, dtype=stacked_data.dtype)
    quals = np.zeros(fill_len, dtype=bool)

    # Append zeros (for the data type) and quality=True (bad)
    h5 = tables.open_file(msid_files['msid'].abs, mode='a')
    logger.verbose('Appending %d zeros to %s' % (len(zeros), msid_files['msid'].abs))
    if not opt.dry_run:
        h5.root.data.append(zeros)
        h5.root.quality.append(quals)
    h5.close()

    # Now actually append the new data
    append_h5_col(new_dats, colname, [])
Example #13
0
def _sync_stat_archive(opt, msid_files, logger, content, stat, index_tbl):
    """
    Actual worker for syncing the stat archive for ``content``.
    """
    # Get the last row of data from the length of the TIME.col (or archfiles?)
    ft = fetch.ft
    ft['content'] = content
    ft['interval'] = stat

    stats_dir = Path(msid_files['statsdir'].abs)
    if not stats_dir.exists():
        logger.debug(f'Skipping {stat} data for {content}: no directory')
        return

    logger.info('')
    logger.info(f'Processing {stat} data for {content}')

    # Get the MSIDs that are in client archive
    msids = [str(fn.name)[:-3] for fn in stats_dir.glob('*.h5')]
    if not msids:
        logger.debug(f'Skipping {stat} data for {content}: no stats h5 files')
        return
    else:
        logger.debug(f'Stat msids are {msids}')

    last_date_id, last_date_id_file = get_last_date_id(
        msid_files, msids, stat, logger)
    logger.verbose(f'Got {last_date_id} as last date_id that was applied to archive')

    # Get list of applicable dat objects (new data, before opt.date_stop).  Also
    # return ``date_id`` which is the date_id of the final data set in the list.
    # This will be written as the new ``last_date_id``.
    try:
        dats, date_id = get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt)
    except urllib.error.URLError as err:
        if 'timed out' in str(err):
            msg = f'  ERROR: timed out getting {stat} data for {content}'
            logger.error(msg)
            process_errors.append(msg)
            return
        else:
            raise

    if not dats:
        return

    dat, msids = concat_data_sets(dats, ['data'])
    with DelayedKeyboardInterrupt(logger):
        with timing_logger(logger, f'Applying updates to {len(msids)} h5 files'):
            for msid in msids:
                fetch.ft['msid'] = msid
                stat_file = msid_files['stats'].abs
                if os.path.exists(stat_file):
                    append_stat_col(dat, stat_file, msid, date_id, opt, logger)

            logger.debug(f'Updating {last_date_id_file} with {date_id}')
            if not opt.dry_run:
                with open(last_date_id_file, 'w') as fh:
                    fh.write(f'{date_id}')
Example #14
0
def check_filetype(filetype):
    ft['content'] = filetype.content.lower()

    if not os.path.exists(msid_files['archfiles'].abs):
        logger.info('No archfiles.db3 for %s - skipping' % ft['content'])
        return

    logger.info('Checking {} content type, archfiles {}'.format(
        ft['content'], msid_files['archfiles'].abs))

    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    archfiles = db.fetchall('select * from archfiles')
    db.conn.close()

    if opt.check_order:
        for archfile0, archfile1 in zip(archfiles[:-1], archfiles[1:]):
            exception = (archfile0['startmjf'] == 77826
                         and archfile0['year'] == 2004
                         and archfile0['doy'] == 309)
            if archfile1['tstart'] < archfile0['tstart'] and not exception:
                logger.info(
                    'ERROR: archfile order inconsistency\n {}\n{}'.format(
                        archfile0, archfile1))

    if not opt.check_lengths:
        colnames = ['TIME']
    else:
        colnames = [
            x for x in pickle.load(open(msid_files['colnames'].abs))
            if x not in fetch.IGNORE_COLNAMES
        ]

    lengths = set()
    for colname in colnames:
        ft['msid'] = colname

        h5 = tables.open_file(msid_files['msid'].abs, mode='r')
        length = len(h5.root.data)
        h5.root.data[length - 1]
        h5.close()

        logger.verbose('MSID {} has length {}'.format(colname, length))
        lengths.add(length)
        if len(lengths) != 1:
            logger.info('ERROR: inconsistent MSID length {} {} {}'.format(
                ft['content'], colname, lengths))
            return  # Other checks don't make sense now

    length = lengths.pop()

    archfile = archfiles[-1]
    if archfile['rowstop'] != length:
        logger.info('ERROR: inconsistent archfile {}: '
                    'last rowstop={} MSID length={}'.format(
                        ft['content'], archfile['rowstop'], length))
        if opt.find_glitch:
            find_glitch()
Example #15
0
def read_archfile(i, f, filetype, row, colnames, archfiles, db):
    """Read filename ``f`` with index ``i`` (position within list of filenames).  The
    file has type ``filetype`` and will be added to MSID file at row index ``row``.
    ``colnames`` is the list of column names for the content type (not used here).
    """
    # Check if filename is already in archfiles.  If so then abort further processing.
    filename = os.path.basename(f)
    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?',
                   (filename, )):
        logger.verbose(
            'File %s already in archfiles - unlinking and skipping' % f)
        os.unlink(f)
        return None, None

    # Read FITS archive file and accumulate data into dats list and header into headers dict
    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    hdus = pyfits.open(f)
    hdu = hdus[1]

    try:
        dat = converters.convert(hdu.data, filetype['content'])

    except converters.NoValidDataError:
        # When creating files allow NoValidDataError
        hdus.close()
        logger.warning(
            'WARNING: no valid data in data file {}'.format(filename))
        return None, None

    except converters.DataShapeError as err:
        hdus.close()
        logger.warning(
            'WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'.
            format(filename, hdu.header['ASCDSVER'], err))
        return None, None

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    archfiles_row = dict(
        (x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols)
    archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum
    archfiles_row['rowstart'] = row
    archfiles_row['rowstop'] = row + len(dat)
    archfiles_row['filename'] = filename
    archfiles_row['filetime'] = int(
        re.search(r'(\d+)', archfiles_row['filename']).group(1))
    filedate = DateTime(archfiles_row['filetime']).date
    year, doy = (int(x)
                 for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups())
    archfiles_row['year'] = year
    archfiles_row['doy'] = doy
    hdus.close()

    return dat, archfiles_row
Example #16
0
File: shell.py Project: sot/pyyaks
def run_tool(cmd=None, punlearn=False, split_char='\n'):
    spawn = Spawn(stdout=None)
    cmds = [pyyaks.context.render(x).strip() for x in cmd.split(split_char)]
    if punlearn:
        spawn.run(['punlearn', cmds[0]])
    logger.verbose('Running ' + ' '.join(cmds))
    exitstatus = spawn.run(cmds)
    logger.verbose(''.join(spawn.outlines))
    if exitstatus:
        raise ValueError('Command failed with exit status = {0}'.format(exitstatus))
    return spawn.outlines
Example #17
0
def check_filetype(filetype):
    ft['content'] = filetype.content.lower()

    if not os.path.exists(msid_files['archfiles'].abs):
        logger.info('No archfiles.db3 for %s - skipping' % ft['content'])
        return

    logger.info('Checking {} content type, archfiles {}'.format(
        ft['content'], msid_files['archfiles'].abs))

    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    archfiles = db.fetchall('select * from archfiles')
    db.conn.close()

    if opt.check_order:
        for archfile0, archfile1 in zip(archfiles[:-1], archfiles[1:]):
            exception = (archfile0['startmjf'] == 77826 and
                         archfile0['year'] == 2004 and archfile0['doy'] == 309)
            if archfile1['tstart'] < archfile0['tstart'] and not exception:
                logger.info('ERROR: archfile order inconsistency\n {}\n{}'
                            .format(archfile0, archfile1))

    if not opt.check_lengths:
        colnames = ['TIME']
    else:
        colnames = [x for x in pickle.load(open(msid_files['colnames'].abs))
                    if x not in fetch.IGNORE_COLNAMES]

    lengths = set()
    for colname in colnames:
        ft['msid'] = colname

        h5 = tables.openFile(msid_files['msid'].abs, mode='r')
        length = len(h5.root.data)
        h5.root.data[length - 1]
        h5.close()

        logger.verbose('MSID {} has length {}'.format(colname, length))
        lengths.add(length)
        if len(lengths) != 1:
            logger.info('ERROR: inconsistent MSID length {} {} {}'.format(
                ft['content'], colname, lengths))
            return  # Other checks don't make sense now

    length = lengths.pop()

    archfile = archfiles[-1]
    if archfile['rowstop'] != length:
        logger.info('ERROR: inconsistent archfile {}: '
                    'last rowstop={} MSID length={}'.format(
            ft['content'], archfile['rowstop'], length))
        if opt.find_glitch:
            find_glitch()
Example #18
0
def read_archfile(i, f, filetype, row, colnames, archfiles, db):
    """Read filename ``f`` with index ``i`` (position within list of filenames).  The
    file has type ``filetype`` and will be added to MSID file at row index ``row``.
    ``colnames`` is the list of column names for the content type (not used here).
    """
    # Check if filename is already in archfiles.  If so then abort further processing.
    filename = os.path.basename(f)
    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)):
        logger.verbose('File %s already in archfiles - unlinking and skipping' % f)
        os.unlink(f)
        return None, None

    # Read FITS archive file and accumulate data into dats list and header into headers dict
    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    hdus = pyfits.open(f, character_as_bytes=True)
    hdu = hdus[1]

    try:
        dat = converters.convert(hdu.data, filetype['content'])

    except converters.NoValidDataError:
        # When creating files allow NoValidDataError
        hdus.close()
        logger.warning('WARNING: no valid data in data file {}'.format(filename))
        return None, None

    except converters.DataShapeError as err:
        hdus.close()
        logger.warning('WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'
                       .format(filename, hdu.header['ASCDSVER'], err))
        return None, None

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    archfiles_row = dict((x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols)
    archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum
    archfiles_row['rowstart'] = row
    archfiles_row['rowstop'] = row + len(dat)
    archfiles_row['filename'] = filename
    archfiles_row['filetime'] = int(re.search(r'(\d+)', archfiles_row['filename']).group(1))
    filedate = DateTime(archfiles_row['filetime']).date
    year, doy = (int(x) for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups())
    archfiles_row['year'] = year
    archfiles_row['doy'] = doy
    hdus.close()

    return dat, archfiles_row
Example #19
0
def get_events_and_event_models(EventModel, cls_name, events_in_dates):
    # Determine which of the events is not already in the database and
    # put them in a list for saving.
    events = []
    event_models = []
    for event in events_in_dates:
        event_model = EventModel.from_dict(event, logger)
        try:
            EventModel.objects.get(pk=event_model.pk)
        except EventModel.DoesNotExist:
            events.append(event)
            event_models.append(event_model)
        else:
            logger.verbose('Skipping {} at {}: already in database'.format(
                cls_name, event['start']))
    return event_models, events
def get_q_atts_transforms(telems, slot, dt):
    """
    Get quaternions and associated transforms, matched to the times of yag/zag data
    in slot.  Apply a time offset ``dt`` to account for latencies in telemetry
    and ACA image readout.
    """
    logger.verbose('Interpolating quaternions for slot {}'.format(slot))
    yz_times = telems['aoacyan{}'.format(slot)].times
    q_times = telems['aoattqt1'].times
    qs = np.empty((len(yz_times), 4), dtype=np.float64)

    for ii in range(4):
        q_vals = telems['aoattqt{}'.format(ii + 1)].vals
        qs[:, ii] = Ska.Numpy.interpolate(q_vals, q_times + dt, yz_times, sorted=True)
    q_atts = quaternion.Quat(quaternion.normalize(qs))
    transforms = q_atts.transform  # N x 3 x 3
    return q_atts, transforms
Example #21
0
def truncate_archive(filetype, date):
    """Truncate msid and statfiles for every archive file after date (to nearest
    year:doy)
    """
    colnames = pickle.load(open(msid_files['colnames'].abs))

    date = DateTime(date).date
    year, doy = date[0:4], date[5:8]

    # Setup db handle with autocommit=False so that error along the way aborts insert transactions
    db = Ska.DBI.DBI(dbi='sqlite',
                     server=msid_files['archfiles'].abs,
                     autocommit=False)

    # Get the earliest row number from the archfiles table where year>=year and doy=>doy
    out = db.fetchall('SELECT rowstart FROM archfiles '
                      'WHERE year>={0} AND doy>={1}'.format(year, doy))
    if len(out) == 0:
        return
    rowstart = out['rowstart'].min()
    time0 = DateTime("{0}:{1}:00:00:00".format(year, doy)).secs

    for colname in colnames:
        ft['msid'] = colname
        filename = msid_files['msid'].abs
        if not os.path.exists(filename):
            raise IOError('MSID file {} not found'.format(filename))
        if not opt.dry_run:
            h5 = tables.openFile(filename, mode='a')
            h5.root.data.truncate(rowstart)
            h5.root.quality.truncate(rowstart)
            h5.close()
        logger.verbose('Removed rows from {0} for filetype {1}:{2}'.format(
            rowstart, filetype['content'], colname))

        # Delete the 5min and daily stats, with a little extra margin
        if colname not in fetch.IGNORE_COLNAMES:
            del_stats(colname, time0, '5min')
            del_stats(colname, time0, 'daily')

    cmd = 'DELETE FROM archfiles WHERE (year>={0} AND doy>={1}) OR year>{0}'.format(
        year, doy, year)
    if not opt.dry_run:
        db.execute(cmd)
        db.commit()
    logger.verbose(cmd)
Example #22
0
def move_archive_files(filetype, archfiles):
    ft['content'] = filetype.content.lower()

    stagedir = arch_files['stagedir'].abs
    if not os.path.exists(stagedir):
        os.makedirs(stagedir)

    for f in archfiles:
        if not os.path.exists(f):
            continue

        if not opt.dry_run and not opt.occ:
            logger.info('mv %s %s' % (os.path.abspath(f), stagedir))
            shutil.move(f, stagedir)

        if os.path.exists(f):
            logger.verbose('Unlinking %s' % os.path.abspath(f))
            os.unlink(f)
Example #23
0
def save_event_to_database(cls_name, event, event_model, models):
    try4times(event_model.save)
    logger.info('Added {} {}'.format(cls_name, event_model))
    if 'dur' in event and event['dur'] < 0:
        logger.info('WARNING: negative event duration for {} {}'.format(
            cls_name, event_model))
    # Add any foreign rows (many to one)
    for foreign_cls_name, rows in event.get('foreign', {}).items():
        ForeignModel = getattr(models, foreign_cls_name)
        if isinstance(rows, np.ndarray):
            rows = [{key: row[key].tolist()
                     for key in row.dtype.names} for row in rows]
        for row in rows:
            # Convert to a plain dict if row is structured array
            foreign_model = ForeignModel.from_dict(row, logger)
            setattr(foreign_model, event_model.model_name, event_model)
            logger.verbose('Adding {}'.format(foreign_model))
            try4times(foreign_model.save)
Example #24
0
def find_glitch():
    ft['msid'] = 'TIME'
    h5 = tables.openFile(msid_files['msid'].abs, mode='r')
    times = h5.root.data

    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    archfiles = db.fetchall('select * from archfiles')
    db.conn.close()

    for archfile in archfiles:
        logger.verbose('archfile {} {} {}'.format(
                archfile['filename'], archfile['year'], archfile['doy']))
        tstart = archfile['tstart']
        rowstart = archfile['rowstart']
        if abs(tstart - times[rowstart]) > opt.max_tstart_mismatch:
            logger.info('ERROR: inconsistency\n {}'.format(archfile))
            break

    h5.close()
Example #25
0
def truncate_archive(filetype, date):
    """Truncate msid and statfiles for every archive file after date (to nearest
    year:doy)
    """
    colnames = pickle.load(open(msid_files['colnames'].abs, 'rb'))

    date = DateTime(date).date
    year, doy = date[0:4], date[5:8]

    # Setup db handle with autocommit=False so that error along the way aborts insert transactions
    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs, autocommit=False)

    # Get the earliest row number from the archfiles table where year>=year and doy=>doy
    out = db.fetchall('SELECT rowstart FROM archfiles '
                      'WHERE year>={0} AND doy>={1}'.format(year, doy))
    if len(out) == 0:
        return
    rowstart = out['rowstart'].min()
    time0 = DateTime("{0}:{1}:00:00:00".format(year, doy)).secs

    for colname in colnames:
        ft['msid'] = colname
        filename = msid_files['msid'].abs
        if not os.path.exists(filename):
            raise IOError('MSID file {} not found'.format(filename))
        if not opt.dry_run:
            h5 = tables.open_file(filename, mode='a')
            h5.root.data.truncate(rowstart)
            h5.root.quality.truncate(rowstart)
            h5.close()
        logger.verbose('Removed rows from {0} for filetype {1}:{2}'.format(
            rowstart, filetype['content'], colname))

        # Delete the 5min and daily stats, with a little extra margin
        if colname not in fetch.IGNORE_COLNAMES:
            del_stats(colname, time0, '5min')
            del_stats(colname, time0, 'daily')

    cmd = 'DELETE FROM archfiles WHERE (year>={0} AND doy>={1}) OR year>{0}'.format(year, doy, year)
    if not opt.dry_run:
        db.execute(cmd)
        db.commit()
    logger.verbose(cmd)
Example #26
0
def find_glitch():
    ft['msid'] = 'TIME'
    h5 = tables.open_file(msid_files['msid'].abs, mode='r')
    times = h5.root.data

    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    archfiles = db.fetchall('select * from archfiles')
    db.conn.close()

    for archfile in archfiles:
        logger.verbose('archfile {} {} {}'.format(archfile['filename'],
                                                  archfile['year'],
                                                  archfile['doy']))
        tstart = archfile['tstart']
        rowstart = archfile['rowstart']
        if abs(tstart - times[rowstart]) > opt.max_tstart_mismatch:
            logger.info('ERROR: inconsistency\n {}'.format(archfile))
            break

    h5.close()
Example #27
0
        def new_func(*args, **kwargs):
            runval = run(func.__name__) if isinstance(run, collections.Callable) else run
            if runval is False:
                return
            elif runval is True:
                pass
            elif runval is None:
                if status['fail']:
                    return
            else:
                raise ValueError('run value = %s but must be True, False, or None' % runval)

            logger.verbose('')
            logger.verbose('-' * 60)
            logger.info(' Running task: %s at %s' % (func.__name__, time.ctime()))
            logger.verbose('-' * 60)

            try:
                func(*args, **kwargs)
                pyyaks.context.store_context(status.get('context_file'))
            except KeyboardInterrupt:
                raise
            except TaskSkip:
                pass
            except:
                if status['fail'] is False:
                    logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc()))
                    status['fail'] = True
Example #28
0
def get_last_date_id(msid_files, msids, stat, logger):
    """
    Get the last date_id used for syncing the client archive.  First try the
    last_date_id file.  If this does not exist then infer a reasonable value
    by looking at stat data for ``msids``

    :param msid_files:
    :param msids:
    :param stat:
    :param logger:
    :return:
    """
    last_date_id_file = msid_files['last_date_id'].abs

    if Path(last_date_id_file).exists():
        logger.verbose(f'Reading {last_date_id_file} to get last update time')
        with open(last_date_id_file, 'r') as fh:
            last_date_id = fh.read()
    else:
        logger.verbose(f'Reading stat h5 files to get last update time')
        times = []
        for msid in msids:
            fetch.ft['msid'] = msid
            filename = msid_files['stats'].abs
            logger.debug(f'Reading {filename} to check stat times')
            with tables.open_file(filename, 'r') as h5:
                index = h5.root.data.cols.index[-1]
                times.append((index + 0.5) * STATS_DT[stat])

        # Get the least recent stats data available and then go back 5 days to be
        # sure nothing gets missed.  Except for ephemeris files that are weird:
        # when they appear in the archive they include weeks of data in the past
        # and possibly future data.
        last_time = min(times)
        lookback = 30 if re.search(r'ephem[01]$', fetch.ft['content'].val) else 5
        last_date_id = get_date_id(DateTime(last_time - lookback * 86400).fits)

    return last_date_id, last_date_id_file
Example #29
0
def append_h5_col(dats, colname, files_overlaps):
    """Append new values to an HDF5 MSID data table.

    :param dats: List of pyfits HDU data objects
    :param colname: column name
    """
    def i_colname(dat):
        """Return the index for `colname` in `dat`"""
        return list(dat.dtype.names).index(colname)

    h5 = tables.openFile(msid_files['msid'].abs, mode='a')
    stacked_data = np.hstack([x[colname] for x in dats])
    stacked_quality = np.hstack([x['QUALITY'][:, i_colname(x)] for x in dats])
    logger.verbose('Appending %d items to %s' %
                   (len(stacked_data), msid_files['msid'].abs))

    if not opt.dry_run:
        h5.root.data.append(stacked_data)
        h5.root.quality.append(stacked_quality)

    # Remove overlaps in the archive files where file0['tstop'] > file1['tstart'].
    # Do this by setting the TIME column quality flag for the overlapping rows
    # in file0.  files_overlaps is a list of 2-tuples with consequetive files that
    # overlap.
    if colname == 'TIME':
        for file0, file1 in files_overlaps:
            times = h5.root.data[file0['rowstart']:file0['rowstop']]
            bad_rowstart = np.searchsorted(times,
                                           file1['tstart']) + file0['rowstart']
            bad_rowstop = file0['rowstop']
            if not opt.dry_run:
                logger.verbose(
                    'Removing overlapping data in rows {0}:{1}'.format(
                        bad_rowstart, bad_rowstop))
                if bad_rowstop > bad_rowstart:
                    h5.root.quality[bad_rowstart:bad_rowstop] = True
                else:
                    logger.verbose(
                        'WARNING: Unexpected null file overlap file0=%s file1=%s'
                        % (file0, file1))

    data_len = len(h5.root.data)
    h5.close()

    return data_len
Example #30
0
def append_h5_col(dats, colname, files_overlaps):
    """Append new values to an HDF5 MSID data table.

    :param dats: List of pyfits HDU data objects
    :param colname: column name
    """
    def i_colname(dat):
        """Return the index for `colname` in `dat`"""
        return list(dat.dtype.names).index(colname)

    h5 = tables.open_file(msid_files['msid'].abs, mode='a')
    stacked_data = np.hstack([x[colname] for x in dats])
    stacked_quality = np.hstack([x['QUALITY'][:, i_colname(x)] for x in dats])
    logger.verbose('Appending %d items to %s' % (len(stacked_data), msid_files['msid'].abs))

    if not opt.dry_run:
        h5.root.data.append(stacked_data)
        h5.root.quality.append(stacked_quality)

    # Remove overlaps in the archive files where file0['tstop'] > file1['tstart'].
    # Do this by setting the TIME column quality flag for the overlapping rows
    # in file0.  files_overlaps is a list of 2-tuples with consequetive files that
    # overlap.
    if colname == 'TIME':
        for file0, file1 in files_overlaps:
            times = h5.root.data[file0['rowstart']:file0['rowstop']]
            bad_rowstart = np.searchsorted(times, file1['tstart']) + file0['rowstart']
            bad_rowstop = file0['rowstop']
            if not opt.dry_run:
                logger.verbose('Removing overlapping data in rows {0}:{1}'.format(
                    bad_rowstart, bad_rowstop))
                if  bad_rowstop > bad_rowstart:
                    h5.root.quality[bad_rowstart:bad_rowstop] = True
                else:
                    logger.verbose('WARNING: Unexpected null file overlap file0=%s file1=%s'
                                   % (file0, file1))

    data_len = len(h5.root.data)
    h5.close()

    return data_len
Example #31
0
File: runasp.py Project: sot/runasp
def link_files(dir, indir, outdir, istart, istop, obiroot, skip_slot=None):
    """
    Creates symbolic links from the specified indir to the available telemetry.
    Fits files are only linked in if their header time keywords are relevant.
    ACA0 image files may be skipped if the slot is in skip_slot list.
    obspars must be for the correct obi.
    """
    dirmap = dict(infiles=indir, outfiles=outdir)
    for filetype in ['infiles', 'outfiles']:
        ldir = dirmap[filetype]
        for fileglob in pipe_config[filetype]:
            match = glob(os.path.join(opt.dir, fileglob))
            for mfile in match:
                fitsmatch = re.match('.*fits', mfile)
                if fitsmatch:
                    header = fits.getheader(mfile)
                    if ((istart >= header['tstop'])
                            or (istop <= header['tstart'])):
                        logger.verbose(
                            "skipping file out of timerange {}".format(mfile))
                        continue
                    aca0 = re.search('aca.*_(\d)_img0', mfile)
                    if skip_slot and aca0:
                        aca_file_slot = int(aca0.group(1))
                        if aca_file_slot in skip_slot:
                            logger.verbose(
                                "skipping slot file on {}".format(mfile))
                            continue
                obsparmatch = re.match('.*obs0a\.par(\.gz)?', mfile)
                if obsparmatch:
                    obimatch = re.match('.*axaf%s_obs0a\.par(\.gz)?' % obiroot,
                                        mfile)
                    if not obimatch:
                        logger.verbose("skipping obspar for different obi")
                        continue
                if not os.path.exists(
                        os.path.join(ldir, os.path.basename(mfile))):
                    logger.info("ln -s {} {}".format(
                        os.path.relpath(mfile, ldir), ldir))
                    bash("ln -s %s %s" % (os.path.relpath(mfile, ldir), ldir))
Example #32
0
def link_files(dir, indir, outdir, istart, istop, obiroot, skip_slot=None):
    """
    Creates symbolic links from the specified indir to the available telemetry.
    Fits files are only linked in if their header time keywords are relevant.
    ACA0 image files may be skipped if the slot is in skip_slot list.
    obspars must be for the correct obi.
    """
    dirmap = dict(infiles=indir,
                  outfiles=outdir)
    for filetype in ['infiles', 'outfiles']:
        ldir = dirmap[filetype]
        for fileglob in pipe_config[filetype]:
            match = glob(os.path.join(opt.dir, fileglob))
            for mfile in match:
                fitsmatch = re.match('.*fits', mfile)
                if fitsmatch:
                    header = fits.getheader(mfile)
                    if ((istart >= header['tstop'])
                            or (istop <= header['tstart'])):
                        logger.verbose("skipping file out of timerange {}".format(mfile))
                        continue
                    aca0 = re.search('aca.*_(\d)_img0', mfile)
                    if skip_slot and aca0:
                        aca_file_slot = int(aca0.group(1))
                        if aca_file_slot in skip_slot:
                            logger.verbose("skipping slot file on {}".format(mfile))
                            continue
                obsparmatch = re.match('.*obs0a\.par(\.gz)?', mfile)
                if obsparmatch:
                    obimatch = re.match('.*axaf%s_obs0a\.par(\.gz)?' % obiroot, mfile)
                    if not obimatch:
                        logger.verbose("skipping obspar for different obi")
                        continue
                if not os.path.exists(os.path.join(ldir, os.path.basename(mfile))):
                    logger.info("ln -s {} {}".format(os.path.relpath(mfile, ldir), ldir))
                    bash("ln -s %s %s" % (os.path.relpath(mfile, ldir), ldir))
Example #33
0
def update_msid_files(filetype, archfiles):
    colnames = pickle.load(open(msid_files['colnames'].abs))
    colnames_all = pickle.load(open(msid_files['colnames_all'].abs))
    old_colnames = colnames.copy()
    old_colnames_all = colnames_all.copy()

    # Setup db handle with autocommit=False so that error along the way aborts insert transactions
    db = Ska.DBI.DBI(dbi='sqlite',
                     server=msid_files['archfiles'].abs,
                     autocommit=False)

    # Get the last row number from the archfiles table
    out = db.fetchone('SELECT max(rowstop) FROM archfiles')
    row = out['max(rowstop)'] or 0
    last_archfile = db.fetchone('SELECT * FROM archfiles where rowstop=?',
                                (row, ))

    archfiles_overlaps = []
    dats = []
    archfiles_processed = []

    content_is_derived = (filetype['instrum'] == 'DERIVED')

    for i, f in enumerate(archfiles):
        get_data = (read_derived if content_is_derived else read_archfile)
        dat, archfiles_row = get_data(i, f, filetype, row, colnames, archfiles,
                                      db)
        if dat is None:
            continue

        # If creating new content type and there are no existing colnames, then
        # define the column names now.  Filter out any multidimensional
        # columns, including (typically) QUALITY.
        if opt.create and not colnames:
            colnames = set(dat.dtype.names)
            for colname in dat.dtype.names:
                if len(dat[colname].shape) > 1:
                    logger.info(
                        'Removing column {} from colnames because shape = {}'.
                        format(colname, dat[colname].shape))
                    colnames.remove(colname)

        # Ensure that the time gap between the end of the last ingested archive
        # file and the start of this one is less than opt.max_gap (or
        # filetype-based defaults).  If this fails then break out of the
        # archfiles processing but continue on to ingest any previously
        # successful archfiles
        if last_archfile is None:
            time_gap = 0
        else:
            time_gap = archfiles_row['tstart'] - last_archfile['tstop']
        max_gap = opt.max_gap
        if max_gap is None:
            if filetype['instrum'] in ['EPHEM', 'DERIVED']:
                max_gap = 601
            elif filetype['content'] == 'ACISDEAHK':
                max_gap = 10000
                # From P.Plucinsky 2011-09-23
                # If ACIS is executing an Event Histogram run while in FMT1,
                # the telemetry stream will saturate.  The amount of time for
                # an opening in the telemetry to appear such that DEA HKP
                # packets can get out is a bit indeterminate.  The histograms
                # integrate for 5400s and then they are telemetered.  I would
                # suggest 6000s, but perhaps you would want to double that to
                # 12000s.
            elif filetype['content'] in ['CPE1ENG', 'CCDM15ENG']:
                # 100 years => no max gap for safe mode telemetry or dwell mode telemetry
                max_gap = 100 * 3.1e7
            else:
                max_gap = 32.9
        if time_gap > max_gap:
            logger.warning(
                'WARNING: found gap of %.2f secs between archfiles %s and %s',
                time_gap, last_archfile['filename'], archfiles_row['filename'])
            if opt.create:
                logger.warning(
                    '       Allowing gap because of opt.create=True')
            elif DateTime() - DateTime(
                    archfiles_row['tstart']) > opt.allow_gap_after_days:
                # After 4 days (by default) just let it go through because this is
                # likely a real gap and will not be fixed by subsequent processing.
                # This can happen after normal sun mode to SIM products.
                logger.warning('       Allowing gap because arch file '
                               'start is more than {} days old'.format(
                                   opt.allow_gap_after_days))
            else:
                break
        elif time_gap < 0:
            # Overlapping archfiles - deal with this in append_h5_col
            archfiles_overlaps.append((last_archfile, archfiles_row))

        # Update the last_archfile values.
        last_archfile = archfiles_row

        # A very small number of archive files (a few) have a problem where the
        # quality column tform is specified as 3B instead of 17X (for example).
        # This breaks things, so in this case just skip the file.  However
        # since last_archfile is set above the gap check considers this file to
        # have been ingested.
        if not content_is_derived and dat['QUALITY'].shape[1] != len(
                dat.dtype.names):
            logger.warning(
                'WARNING: skipping because of quality size mismatch: %d %d' %
                (dat['QUALITY'].shape[1], len(dat.dtype.names)))
            continue

        # Mark the archfile as ingested in the database and add to list for
        # subsequent relocation into arch_files archive.  In the case of a gap
        # where ingest is stopped before all archfiles are processed, this will
        # leave files either in a tmp dir (HEAD) or in the stage dir (OCC).
        # In the latter case this allows for successful processing later when the
        # gap gets filled.
        archfiles_processed.append(f)
        if not opt.dry_run:
            db.insert(archfiles_row, 'archfiles')

        # Capture the data for subsequent storage in the hdf5 files
        dats.append(dat)

        # Update the running list of column names.  Colnames_all is the maximal (union)
        # set giving all column names seen in any file for this content type.  Colnames
        # was historically the minimal (intersection) set giving the list of column names
        # seen in every file, but as of 0.39 it is allowed to grow as well to accommodate
        # adding MSIDs in the TDB.  Include only 1-d columns, not things like AEPERR
        # in PCAD8ENG which is a 40-element binary vector.
        colnames_all.update(dat.dtype.names)
        colnames.update(name for name in dat.dtype.names
                        if dat[name].ndim == 1)

        row += len(dat)

    if dats:
        logger.verbose('Writing accumulated column data to h5 file at ' +
                       time.ctime())
        data_lens = set()
        processed_cols = set()
        for colname in colnames:
            ft['msid'] = colname
            if not os.path.exists(msid_files['msid'].abs):
                make_h5_col_file(dats, colname)
                if not opt.create:
                    # New MSID was found for this content type.  This must be associated with
                    # an update to the TDB.  Skip for the moment to ensure that other MSIDs
                    # are fully processed.
                    continue
            data_len = append_h5_col(dats, colname, archfiles_overlaps)
            data_lens.add(data_len)
            processed_cols.add(colname)

        if len(data_lens) != 1:
            raise ValueError(
                'h5 data length inconsistency {}, investigate NOW!'.format(
                    data_lens))

        # Process any new MSIDs (this is extremely rare)
        data_len = data_lens.pop()
        for colname in colnames - processed_cols:
            ft['msid'] = colname
            append_filled_h5_col(dats, colname, data_len)

    # Assuming everything worked now commit the db inserts that signify the
    # new archive files have been processed
    if not opt.dry_run:
        db.commit()

    # If colnames or colnames_all changed then give warning and update files.
    if colnames != old_colnames:
        logger.warning('WARNING: updating %s because colnames changed: %s' %
                       (msid_files['colnames'].abs, old_colnames ^ colnames))
        if not opt.dry_run:
            pickle.dump(colnames, open(msid_files['colnames'].abs, 'w'))
    if colnames_all != old_colnames_all:
        logger.warning(
            'WARNING: updating %s because colnames_all changed: %s' %
            (msid_files['colnames_all'].abs, colnames_all ^ old_colnames_all))
        if not opt.dry_run:
            pickle.dump(colnames_all, open(msid_files['colnames_all'].abs,
                                           'w'))

    return archfiles_processed
Example #34
0
def copy_resources():
    time.sleep(1)
    make_dir(FILE['resources_dir'].abs)
    for name in ('pyaxx.css', 'index_template.html'):
        logger.verbose('Copying %s to resources' % name)
        shutil.copy(os.path.join(prog_dir, name), FILE[name].abs)
Example #35
0
 def setup(self):
     self.origdir = os.getcwd()
     newdir = pyyaks.context.render(self.newdir)
     os.chdir(newdir)
     logger.verbose('Changed to directory "%s"' % newdir)
Example #36
0
def restore_src(filename):
    if os.path.exists(filename):
        logger.verbose('Restoring from %s' % filename)
        time.sleep(3)
        with open(filename, 'rb') as fh:
            SRC.update(pickle.load(fh))
Example #37
0
def store_src(filename):
    logger.verbose('Storing to %s' % filename)
    time.sleep(3)
    with open(filename, 'wb') as fh:
        pickle.dump(SRC, fh)
Example #38
0
def append_h5_col(opt, msid, vals, logger, msid_files):
    """Append new values to an HDF5 MSID data table.

    :param opt:
    :param msid:
    :param vals: dict with `data`, `quality`, `row0` and `row1` keys
    :param logger:
    :param msid_files:
    """
    fetch.ft['msid'] = msid

    msid_file = Path(msid_files['msid'].abs)
    if not msid_file.exists():
        logger.debug(f'Skipping MSID update no {msid_file}')
        return

    mode = 'r' if opt.dry_run else 'a'
    with tables.open_file(str(msid_file), mode=mode) as h5:
        # If the vals[] data begins before the end of current data then chop the
        # beginning of data for this row.
        last_row_idx = len(h5.root.data) - 1
        if vals['row0'] <= last_row_idx:
            idx0 = last_row_idx + 1 - vals['row0']
            logger.debug(f'Chopping {idx0 + 1} rows from data')
            for key in ('data', 'quality'):
                vals[key] = vals[key][idx0:]
            vals['row0'] += idx0

        n_vals = len(vals['data'])
        logger.verbose(f'Appending {n_vals} rows to {msid_file}')

        # Normally at this point there is always data to append since we got here
        # by virtue of the TIME.h5 file being incomplete relative to available sync
        # data.  However, user might have manually rsynced a file as part of adding
        # a new MSID, in which case it might be up to date and there is no req'd action.
        if n_vals == 0:
            return

        if vals['row0'] != len(h5.root.data):
            raise RowMismatchError(
                f'ERROR: unexpected discontinuity for full msid={msid} '
                f'content={fetch.ft["content"]}\n'
                f'Looks like your archive is in a bad state, CONTACT '
                f'your local Ska expert with this info:\n'
                f'  First row0 in new data {vals["row0"]} != '
                f'length of existing data {len(h5.root.data)}')

        # For the TIME column include special processing to effectively remove
        # existing rows that are superceded by new rows in time.  This is done by
        # marking the TIME value as bad quality.  This process happens regularly
        # for ephemeris content, which gets updated once weekly and has substantial
        # overlaps in the archive data.  Here we only worry about the beginning of
        # new data because anything in the middle will have already been marked
        # bad by update_archive.py.
        if msid == 'TIME':
            time0 = vals['data'][0]
            idx1 = len(h5.root.data) - 1
            ii = 0
            while h5.root.data[idx1 - ii] - time0 > -0.0001:
                h5.root.quality[idx1 - ii] = True
                ii += 1
            if ii > 0:
                logger.verbose(f'Excluded {ii} rows due to overlap')

        if not opt.dry_run:
            h5.root.data.append(vals['data'])
            h5.root.quality.append(vals['quality'])
Example #39
0
def update_msid_files(filetype, archfiles):
    colnames = pickle.load(open(msid_files['colnames'].abs, 'rb'))
    colnames_all = pickle.load(open(msid_files['colnames_all'].abs, 'rb'))
    old_colnames = colnames.copy()
    old_colnames_all = colnames_all.copy()

    # Setup db handle with autocommit=False so that error along the way aborts insert transactions
    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs, autocommit=False)

    # Get the last row number from the archfiles table
    out = db.fetchone('SELECT max(rowstop) FROM archfiles')
    row = out['max(rowstop)'] or 0
    last_archfile = db.fetchone('SELECT * FROM archfiles where rowstop=?', (row,))

    archfiles_overlaps = []
    dats = []
    archfiles_processed = []

    content_is_derived = (filetype['instrum'] == 'DERIVED')

    for i, f in enumerate(archfiles):
        get_data = (read_derived if content_is_derived else read_archfile)
        dat, archfiles_row = get_data(i, f, filetype, row, colnames, archfiles, db)
        if dat is None:
            continue

        # If creating new content type and there are no existing colnames, then
        # define the column names now.  Filter out any multidimensional
        # columns, including (typically) QUALITY.
        if opt.create and not colnames:
            colnames = set(dat.dtype.names)
            for colname in dat.dtype.names:
                if len(dat[colname].shape) > 1:
                    logger.info('Removing column {} from colnames because shape = {}'
                                .format(colname, dat[colname].shape))
                    colnames.remove(colname)

        # Ensure that the time gap between the end of the last ingested archive
        # file and the start of this one is less than opt.max_gap (or
        # filetype-based defaults).  If this fails then break out of the
        # archfiles processing but continue on to ingest any previously
        # successful archfiles
        if last_archfile is None:
            time_gap = 0
        else:
            time_gap = archfiles_row['tstart'] - last_archfile['tstop']
        max_gap = opt.max_gap
        if max_gap is None:
            if filetype['instrum'] in ['EPHEM', 'DERIVED']:
                max_gap = 601
            elif filetype['content'] == 'ACISDEAHK':
                max_gap = 10000
                # From P.Plucinsky 2011-09-23
                # If ACIS is executing an Event Histogram run while in FMT1,
                # the telemetry stream will saturate.  The amount of time for
                # an opening in the telemetry to appear such that DEA HKP
                # packets can get out is a bit indeterminate.  The histograms
                # integrate for 5400s and then they are telemetered.  I would
                # suggest 6000s, but perhaps you would want to double that to
                # 12000s.
            elif filetype['content'] in ['CPE1ENG', 'CCDM15ENG']:
                # 100 years => no max gap for safe mode telemetry or dwell mode telemetry
                max_gap = 100 * 3.1e7
            else:
                max_gap = 32.9
        if time_gap > max_gap:
            logger.warning('WARNING: found gap of %.2f secs between archfiles %s and %s',
                           time_gap, last_archfile['filename'], archfiles_row['filename'])
            if opt.create:
                logger.warning('       Allowing gap because of opt.create=True')
            elif DateTime() - DateTime(archfiles_row['tstart']) > opt.allow_gap_after_days:
                # After 4 days (by default) just let it go through because this is
                # likely a real gap and will not be fixed by subsequent processing.
                # This can happen after normal sun mode to SIM products.
                logger.warning('       Allowing gap because arch file '
                               'start is more than {} days old'
                               .format(opt.allow_gap_after_days))
            else:
                break
        elif time_gap < 0:
            # Overlapping archfiles - deal with this in append_h5_col
            archfiles_overlaps.append((last_archfile, archfiles_row))

        # Update the last_archfile values.
        last_archfile = archfiles_row

        # A very small number of archive files (a few) have a problem where the
        # quality column tform is specified as 3B instead of 17X (for example).
        # This breaks things, so in this case just skip the file.  However
        # since last_archfile is set above the gap check considers this file to
        # have been ingested.
        if not content_is_derived and dat['QUALITY'].shape[1] != len(dat.dtype.names):
            logger.warning('WARNING: skipping because of quality size mismatch: %d %d' %
                           (dat['QUALITY'].shape[1], len(dat.dtype.names)))
            continue

        # Mark the archfile as ingested in the database and add to list for
        # subsequent relocation into arch_files archive.  In the case of a gap
        # where ingest is stopped before all archfiles are processed, this will
        # leave files either in a tmp dir (HEAD) or in the stage dir (OCC).
        # In the latter case this allows for successful processing later when the
        # gap gets filled.
        archfiles_processed.append(f)
        if not opt.dry_run:
            db.insert(archfiles_row, 'archfiles')

        # Capture the data for subsequent storage in the hdf5 files
        dats.append(dat)

        # Update the running list of column names.  Colnames_all is the maximal (union)
        # set giving all column names seen in any file for this content type.  Colnames
        # was historically the minimal (intersection) set giving the list of column names
        # seen in every file, but as of 0.39 it is allowed to grow as well to accommodate
        # adding MSIDs in the TDB.  Include only 1-d columns, not things like AEPERR
        # in PCAD8ENG which is a 40-element binary vector.
        colnames_all.update(dat.dtype.names)
        colnames.update(name for name in dat.dtype.names if dat[name].ndim == 1)

        row += len(dat)

    if dats:
        logger.verbose('Writing accumulated column data to h5 file at ' + time.ctime())
        data_lens = set()
        processed_cols = set()
        for colname in colnames:
            ft['msid'] = colname
            if not os.path.exists(msid_files['msid'].abs):
                make_h5_col_file(dats, colname)
                if not opt.create:
                    # New MSID was found for this content type.  This must be associated with
                    # an update to the TDB.  Skip for the moment to ensure that other MSIDs
                    # are fully processed.
                    continue
            data_len = append_h5_col(dats, colname, archfiles_overlaps)
            data_lens.add(data_len)
            processed_cols.add(colname)

        if len(data_lens) != 1:
            raise ValueError('h5 data length inconsistency {}, investigate NOW!'
                             .format(data_lens))

        # Process any new MSIDs (this is extremely rare)
        data_len = data_lens.pop()
        for colname in colnames - processed_cols:
            ft['msid'] = colname
            append_filled_h5_col(dats, colname, data_len)

    # Assuming everything worked now commit the db inserts that signify the
    # new archive files have been processed
    if not opt.dry_run:
        db.commit()

    # If colnames or colnames_all changed then give warning and update files.
    if colnames != old_colnames:
        logger.warning('WARNING: updating %s because colnames changed: %s'
                       % (msid_files['colnames'].abs, old_colnames ^ colnames))
        if not opt.dry_run:
            pickle.dump(colnames, open(msid_files['colnames'].abs, 'wb'), protocol=0)
    if colnames_all != old_colnames_all:
        logger.warning('WARNING: updating %s because colnames_all changed: %s'
                       % (msid_files['colnames_all'].abs, colnames_all ^ old_colnames_all))
        if not opt.dry_run:
            pickle.dump(colnames_all, open(msid_files['colnames_all'].abs, 'wb'), protocol=0)

    return archfiles_processed
Example #40
0
def _get_stat_data_from_archive(filename, stat, tstart, tstop, last_row1,
                                logger):
    """
    Return stat table rows in the range tstart <= time < tstop.

    Also returns the corresponding table row indexes.

    :param filename: HDF5 file to read
    :param stat: stat (5min or daily)
    :param tstart: min time
    :param tstop: max time
    :param last_row1: row1 for previous index table entry
    :param logger: logger
    :return:
    """
    dt = STATS_DT[stat]

    logger.verbose(
        f'_get_stat_data({filename}, {stat}, {DateTime(tstart).fits}, '
        f'{DateTime(tstop).fits}, {last_row1})')

    with tables.open_file(filename, 'r') as h5:
        # Check if tstart is beyond the end of the table.  If so, return an empty table
        table = h5.root.data
        last_index = table[-1]['index']
        last_time = (last_index + 0.5) * dt
        if tstart > last_time:
            logger.verbose(
                f'No available stats data {DateTime(tstart).fits} > '
                f'{DateTime(last_time).fits} (returning empty table)')
            row0 = row1 = len(table)
            table_rows = table[row0:row1]
        else:
            # Compute approx number of rows from the end for tstart.  Normally the index value
            # goes in lock step with row, but it can happen that an index is missed because of
            # missing data.  But if we back up by delta_rows, we are guaranteed to get to at
            # least the row corresponding to tstart.
            delta_rows = int((last_time - tstart) / dt) + 10

            # For rarely sampled data like CPE1ENG, delta_rows can end up being larger than the
            # table due to the gaps.  Therefore clip to the length of the table.
            if delta_rows > len(table):
                delta_rows = len(table)

            times = (table[-delta_rows:]['index'] + 0.5) * dt

            # In the worst case of starting to sync a client archive for a rarely-sampled
            # content like cpe1eng or pcad7eng (AOSPASA2CV,) we need to include an extra ``dt``
            # on both ends to ensure that the first / last rows are caught. If the last
            # full-res sample is either before or after the stat mid-point timestamp then
            # stat sample may get dropped. This happened in real life for AOSPASA2CV.
            # Having extra rows on front is OK because they just get clipped, and an extra
            # row on back is OK because of clipping on the next update (and in normal
            # processing we always want the sync archive to have all recent data).
            sub_row0, sub_row1 = np.searchsorted(times,
                                                 [tstart - dt, tstop + dt])
            sub_row_offset = len(table) - delta_rows

            row0 = sub_row0 + sub_row_offset
            row1 = sub_row1 + sub_row_offset

            # If we have the last value of row1 (from previous sync entry) then use
            # that instead of computed value for row0.
            if last_row1 is not None:
                row0 = last_row1

            table_rows = table[row0:
                               row1]  # returns np.ndarray (structured array)

    return table_rows, row0, row1
Example #41
0
def update(EventModel, date_stop):
    import django.db
    from django.core.exceptions import ObjectDoesNotExist
    from .events import models

    date_stop = DateTime(date_stop)
    cls_name = EventModel.__name__

    try:
        update = models.Update.objects.get(name=cls_name)
    except ObjectDoesNotExist:
        logger.info("No previous update for {} found".format(cls_name))
        duration = EventModel.lookback
        update = models.Update(name=cls_name, date=date_stop.date)
        date_start = date_stop - EventModel.lookback
    else:
        duration = date_stop - DateTime(update.date)
        date_start = DateTime(update.date) - EventModel.lookback
        update.date = date_stop.date

        # Some events like LoadSegment or DsnComm might change in the database after
        # having been ingested.  Use lookback_delete (less than lookback) to
        # always remove events in that range and re-ingest.
        if duration >= 0.5 and hasattr(EventModel, "lookback_delete"):
            delete_date = DateTime(update.date) - EventModel.lookback_delete
            delete_from_date(EventModel, delete_date, set_update_date=False)

    if duration < 0.5:
        logger.info("Skipping {} events because update duration={:.1f} is < 0.5 day".format(cls_name, duration))
        return

    # Some events like LoadSegment, DsnComm are defined into the future, so
    # modify date_stop accordingly.  Note that update.date is set to the
    # nominal date_stop (typically NOW), and this refers more to the last date
    # of processing rather than the actual last date in the archive.
    if hasattr(EventModel, "lookforward"):
        date_stop = date_stop + EventModel.lookforward

    logger.info("Updating {} events from {} to {}".format(cls_name, date_start.date[:-4], date_stop.date[:-4]))

    # Get events for this model from telemetry.  This is returned as a list
    # of dicts with key/val pairs corresponding to model fields
    events = EventModel.get_events(date_start, date_stop)

    with django.db.transaction.commit_on_success():
        for event in events:
            # Try to save event.  Use force_insert=True because otherwise django will
            # update if the event primary key already exists.  In this case we want to
            # force an exception and move on to the next event.
            try:
                event_model = EventModel.from_dict(event, logger)
                event_model.save(force_insert=True)
            except django.db.utils.IntegrityError as err:
                if not re.search("unique", str(err), re.IGNORECASE):
                    raise
                logger.verbose("Skipping {} at {}: already in database ({})".format(cls_name, event["start"], err))
                continue

            logger.info("Added {} {}".format(cls_name, event_model))
            if "dur" in event and event["dur"] < 0:
                logger.info("WARNING: negative event duration for {} {}".format(cls_name, event_model))

            # Add any foreign rows (many to one)
            for foreign_cls_name, rows in event.get("foreign", {}).items():
                ForeignModel = getattr(models, foreign_cls_name)
                if isinstance(rows, np.ndarray):
                    rows = [{key: row[key].tolist() for key in row.dtype.names} for row in rows]
                for row in rows:
                    # Convert to a plain dict if row is structured array
                    foreign_model = ForeignModel.from_dict(row, logger)
                    setattr(foreign_model, event_model.model_name, event_model)
                    logger.verbose("Adding {}".format(foreign_model))
                    foreign_model.save()

    # If processing got here with no exceptions then save the event update
    # information to database
    update.save()
Example #42
0
def main(opt):
    # get files
    if opt.obsid:
        logger.info('Opening connection to archive server')
        arc5 = Ska.arc5gl.Arc5gl()
        for (prod, query) in pipe_config['archfiles']:
            proddir = os.path.join(opt.dir, prod)
            if not os.path.exists(proddir):
                logger.info('Creating directory {}'.format(proddir))
                os.makedirs(proddir)
            else:
                logger.info('Skipping directory {}: exists'.format(proddir))
                continue
            obsid = opt.obsid
            if logger.level < 20:
                arc5.echo = True
            arc5.sendline("cd %s" % os.path.abspath(proddir))
            arc5.sendline("obsid=%d" % int(obsid))
            if opt.version is not None:
                arc5.sendline("version=%s" % opt.version)
            logger.info('Sending "get {}"'.format(query))
            arc5.sendline("get %s" % query)
            gotfiles = glob(os.path.join(proddir, "*"))
            if not len(gotfiles):
                os.rmdir(proddir)
        del arc5

    caiprops_files = glob(os.path.join(opt.dir, "asp05",
                                       "pcad*cai0a.par*"))
    if not len(caiprops_files):
        if os.path.exists(os.path.join(opt.dir, "asp05")):
            mock_cai_file(opt)
        else:
            raise ValueError

    # check files
    for filetype in ['infiles', 'outfiles']:
        for fileglob in pipe_config[filetype]:
            match = glob(os.path.join(opt.dir, fileglob))
            if not len(match):
                raise ValueError("No files found for glob %s"
                                 % fileglob)
            for mfile in match:
                if re.match(".*\.gz", mfile):
                    logger.verbose('Unzipping {}'.format(mfile))
                    bash("gunzip -f %s" % os.path.abspath(mfile))

    # reset this to get unzipped names
    caiprops_files = glob(os.path.join(opt.dir, "asp05",
                                       "pcad*cai0a.par*"))

    # constant aspect interval files
    obi = {}
    if len(caiprops_files):
        for cai_file in caiprops_files:
            cai = get_par(cai_file, cai_override)
            if not cai['obi_num'] in obi:
                obi[cai['obi_num']] = {}
            interval = 0
            while ("istart_%d" % interval in cai
                   and "istop_%d" % interval in cai):
                obi[cai['obi_num']][interval] = \
                    {'istart': cai['istart_%d' % interval],
                     'istop':  cai['istop_%d' % interval]}
                interval += 1

    ai_cmds = []
    for obi_num in obi:
        # read possible obspars
        obspar_files = glob(os.path.join(opt.dir, "obspar/*.par"))
        for ofile in obspar_files:
            obspar = get_obspar(ofile)
            if obspar['obi_num'] == obi_num:
                obsmatch = re.search('axaf(.+)_obs0a\.par', ofile)
                obiroot = obsmatch.group(1)
        if not obiroot:
            raise ValueError("no obspar for obi %d" % obi_num)

        for ai_num in obi[obi_num]:
            aspect_interval = obi[obi_num][ai_num]
            istart = aspect_interval['istart']
            istop = aspect_interval['istop']
            root = "f%09d" % istart
            # Work inplace?  Only if told to start at a specific pipe
            inplace = False
            if opt.pipe_start_at:
                inplace = True
            # directory setup
            workdir, indir, outdir = dir_setup(opt.dir,
                                               int(istart),
                                               label=opt.label,
                                               inplace=inplace,
                                               rev=opt.revision)

            # if skipping the slot by chucking the telem
            telem_skip_slot = []
            process_skip_slot = []
            if opt.skip_slot_method == 'telem':
                telem_skip_slot = opt.skip_slot
            else:
                process_skip_slot = opt.skip_slot

            # link relevant files
            link_files(opt.dir, indir, outdir, istart, istop,
                       obiroot, telem_skip_slot)

            # make list files
            make_list_files(opt.dir, indir, outdir, root)

            # spec
            cmd = dict(dir=os.path.abspath(opt.dir),
                       obi=obi_num,
                       indir=indir,
                       outdir=outdir,
                       root=root,
                       pipe_ped="%s.ped" % pipe_config['pipe_ped'],
                       istart=istart,
                       istop=istop,
                       obiroot=obiroot,
                       log="%s_f%09d.log" % (pipe_config['pipe_ped'], istart))
            if len(process_skip_slot):
                cmd['skip_slot'] = process_skip_slot
            if opt.pipe_start_at:
                cmd['pipe_start_at'] = opt.pipe_start_at
            if opt.pipe_stop_before:
                cmd['pipe_stop_before'] = opt.pipe_stop_before

            ai_cmds.append(cmd)

    range_ais = get_range_ai(ai_cmds, opt.range)
    run_ai(range_ais)
Example #43
0
def read_derived(i, filename, filetype, row, colnames, archfiles, db):
    """Read derived data using eng_archive and derived computation classes.
    ``filename`` has format <content>_<index0>_<index1> where <content>
    is the content type (e.g. "dp_thermal128"), <index0> is the start index for
    the new data and index1 is the end index (using Python slicing convention
    index0:index1).  Args ``i``, ``filetype``, and ``row`` are as in
    read_archive().  ``row`` must equal <index0>.  ``colnames`` is the list of
    column names for the content type.
    """
    # Check if filename is already in archfiles.  If so then abort further processing.

    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?',
                   (filename, )):
        logger.verbose('File %s already in archfiles - skipping' % filename)
        return None, None

    # f has format <content>_<index0>_<index1>
    # <content> has format dp_<content><mnf_step> e.g. dp_thermal128
    content, index0, index1 = filename.split(':')
    index0 = int(index0)
    index1 = int(index1)
    mnf_step = int(re.search(r'(\d+)$', content).group(1))
    time_step = mnf_step * derived.MNF_TIME
    times = time_step * np.arange(index0, index1)

    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    vals = {}
    bads = np.zeros((len(times), len(colnames)), dtype=np.bool)
    for i, colname in enumerate(colnames):
        if colname == 'TIME':
            vals[colname] = times
            bads[:, i] = False
        else:
            dp_class = getattr(Ska.engarchive.derived, colname.upper())
            dp = dp_class()
            dataset = dp.fetch(times[0] - 1000, times[-1] + 1000)
            ok = (index0 <= dataset.indexes) & (dataset.indexes < index1)
            vals[colname] = dp.calc(dataset)[ok]
            bads[:, i] = dataset.bads[ok]

    vals['QUALITY'] = bads
    dat = Ska.Numpy.structured_array(vals, list(colnames) + ['QUALITY'])

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    date = DateTime(times[0]).date
    year, doy = date[0:4], date[5:8]
    archfiles_row = dict(filename=filename,
                         filetime=int(index0 * time_step),
                         year=year,
                         doy=doy,
                         tstart=times[0],
                         tstop=times[-1],
                         rowstart=row,
                         rowstop=row + len(dat),
                         startmjf=index0,
                         stopmjf=index1,
                         date=date)

    return dat, archfiles_row
Example #44
0
def update_derived(filetype):
    """Update full resolution MSID archive files for derived parameters with ``filetype``
    """
    # Get the last H5 table row from archfiles table for this content type
    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    last_row = db.fetchone('SELECT * FROM archfiles ORDER BY filetime DESC')

    # Set the starting index from the last row in archfiles.  This
    # uses Python slicing conventions so that the previous "end"
    # value is exactly the next "start" values, e.g. [index0:index1]
    # For derived parameters we have stopmjf <==> index1
    index0 = last_row['stopmjf']

    # Get the full set of rootparams for all colnames
    colnames = pickle.load(open(msid_files['colnames'].abs))
    colnames = [x for x in colnames if x.startswith('DP_')]
    msids = set()
    for colname in colnames:
        dp_class = getattr(derived, colname)
        dp = dp_class()
        msids = msids.union([x.upper() for x in dp.rootparams])
        time_step = dp.time_step  # will be the same for every DP

    # Find the last time in archive for each of the content types
    # occuring in the list of rootparam MSIDs.
    # fetch.content is a mapping from MSID to content type
    last_times = {}
    ft_content = ft['content'].val
    for msid in msids:
        ft['msid'] = 'TIME'
        content = ft['content'] = fetch.content[msid]
        if content not in last_times:
            h5 = tables.openFile(fetch.msid_files['msid'].abs, mode='r')
            last_times[content] = h5.root.data[-1]
            h5.close()
    last_time = min(last_times.values()) - 1000
    ft['content'] = ft_content

    # Make a list of indexes that will correspond to the index/time ranges
    # for each pseudo-"archfile".  In this context an archfile just specifies
    # the time range covered by an ingest, but is needed by fetch to roughly
    # locate rows in the H5 file for fast queries.  Each archfile is 10000 sec
    # long, and when updating the database no more than 1000000 seconds of
    # telemetry will be read at one time.
    archfile_time_step = 10000.0
    max_archfiles = int(1000000.0 / archfile_time_step)

    # Read data out to either date_now or the last available time in telemetry.
    # opt.date_now could be set in the past for testing.
    index_step = int(round(archfile_time_step / time_step))
    time1 = min(DateTime(opt.date_now).secs, last_time)
    index1 = int(time1 / time_step)
    indexes = np.arange(index0, index1, index_step)

    archfiles = []
    for index0, index1 in zip(indexes[:-1], indexes[1:]):
        archfiles.append('{}:{}:{}'.format(filetype['content'], index0,
                                           index1))
        if len(archfiles) == max_archfiles or index1 == indexes[-1]:
            update_msid_files(filetype, archfiles)
            logger.verbose(
                'update_msid_files(filetype={}, archfiles={})'.format(
                    str(filetype), archfiles))
            archfiles = []
Example #45
0
def update_derived(filetype):
    """Update full resolution MSID archive files for derived parameters with ``filetype``
    """
    # Get the last H5 table row from archfiles table for this content type
    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    last_row = db.fetchone('SELECT * FROM archfiles ORDER BY filetime DESC')

    # Set the starting index from the last row in archfiles.  This
    # uses Python slicing conventions so that the previous "end"
    # value is exactly the next "start" values, e.g. [index0:index1]
    # For derived parameters we have stopmjf <==> index1
    index0 = last_row['stopmjf']

    # Get the full set of rootparams for all colnames
    colnames = pickle.load(open(msid_files['colnames'].abs, 'rb'))
    colnames = [x for x in colnames if x.startswith('DP_')]
    msids = set()
    for colname in colnames:
        dp_class = getattr(derived, colname)
        dp = dp_class()
        msids = msids.union([x.upper() for x in dp.rootparams])
        time_step = dp.time_step  # will be the same for every DP

    # Find the last time in archive for each of the content types
    # occuring in the list of rootparam MSIDs.
    # fetch.content is a mapping from MSID to content type
    last_times = {}
    ft_content = ft['content'].val
    for msid in msids:
        ft['msid'] = 'TIME'
        content = ft['content'] = fetch.content[msid]
        if content not in last_times:
            h5 = tables.open_file(fetch.msid_files['msid'].abs, mode='r')
            last_times[content] = h5.root.data[-1]
            h5.close()
    last_time = min(last_times.values()) - 1000
    ft['content'] = ft_content

    # Make a list of indexes that will correspond to the index/time ranges
    # for each pseudo-"archfile".  In this context an archfile just specifies
    # the time range covered by an ingest, but is needed by fetch to roughly
    # locate rows in the H5 file for fast queries.  Each archfile is 10000 sec
    # long, and when updating the database no more than 1000000 seconds of
    # telemetry will be read at one time.
    archfile_time_step = 10000.0
    max_archfiles = int(1000000.0 / archfile_time_step)

    # Read data out to either date_now or the last available time in telemetry.
    # opt.date_now could be set in the past for testing.
    index_step = int(round(archfile_time_step / time_step))
    time1 = min(DateTime(opt.date_now).secs, last_time)
    index1 = int(time1 / time_step)
    indexes = np.arange(index0, index1, index_step)

    archfiles = []
    for index0, index1 in zip(indexes[:-1], indexes[1:]):
        archfiles.append('{}:{}:{}'.format(filetype['content'], index0, index1))
        if len(archfiles) == max_archfiles or index1 == indexes[-1]:
            update_msid_files(filetype, archfiles)
            logger.verbose('update_msid_files(filetype={}, archfiles={})'
                           .format(str(filetype), archfiles))
            archfiles = []
Example #46
0
def update_sync_data_stat(content, logger, row, stat):
    """
    Update stats (5min, daily) sync data for index table ``row``

    :param content: content name (e.g. acis4eng)
    :param logger: logger
    :param row: one row of the full-res index table
    :param stat: stat interval (5min or daily)
    :return:
    """
    ft = fetch.ft
    ft['interval'] = stat

    outfile = Path(sync_files['data'].abs)
    if outfile.exists():
        logger.verbose(f'Skipping {outfile}, already exists')
        return

    # First get the times corresponding to row0 and row1 in the full resolution archive
    ft['msid'] = 'TIME'
    with tables.open_file(fetch.msid_files['msid'].abs, 'r') as h5:
        table = h5.root.data
        tstart = table[row['row0']]
        # Ensure that table row1 (for tstop) doesn't fall off the edge since the last
        # index file row will have row1 exactly equal to the table length.
        row1 = min(row['row1'], len(table) - 1)
        tstop = table[row1]

    out = {}
    msids = list(fetch.all_colnames[content] - set(fetch.IGNORE_COLNAMES))

    # Get dict of last sync repo row for each MSID.  This is keyed as {msid: last_row1},
    # where row1 is (as always) the slice row1.
    last_rows_filename = sync_files['last_rows'].abs
    if Path(last_rows_filename).exists():
        logger.verbose(f'Reading {last_rows_filename}')
        last_rows = pickle.load(open(last_rows_filename, 'rb'))
    else:
        last_rows = {}

    # Go through each MSID and get the raw HDF5 table data corresponding to the
    # time range tstart:tstop found above.
    n_rows_set = set()
    n_msids = 0
    for msid in msids:
        last_row1 = last_rows.get(msid)
        ft['msid'] = msid
        filename = fetch.msid_files['stats'].abs
        if not Path(filename).exists():
            logger.debug(f'No {stat} stat data for {msid} - skipping')
            continue

        n_msids += 1
        stat_rows, row0, row1 = _get_stat_data_from_archive(
            filename, stat, tstart, tstop, last_row1, logger)
        logger.verbose(f'Got stat rows {row0} {row1} for stat {stat} {msid}')
        n_rows_set.add(row1 - row0)
        if row1 > row0:
            out[f'{msid}.data'] = stat_rows
            out[f'{msid}.row0'] = row0
            out[f'{msid}.row1'] = row1
            last_rows[msid] = row1

    n_rows = n_rows_set.pop() if len(n_rows_set) == 1 else n_rows_set

    outfile.parent.mkdir(exist_ok=True, parents=True)
    # TODO: increase compression to max (gzip?)
    logger.info(
        f'Writing {outfile} with {n_rows} rows of data and {n_msids} msids')
    with gzip.open(outfile, 'wb') as fh:
        pickle.dump(out, fh)

    # Save the row1 value for each MSID to use as row0 for the next update
    logger.verbose(f'Writing {last_rows_filename}')
    with open(last_rows_filename, 'wb') as fh:
        pickle.dump(last_rows, fh)
Example #47
0
def update_index_file(index_file, opt, logger):
    """Update the top-level index file of data available in the sync archive

    :param index_file: Path of index ECSV file
    :param opt: options
    :param logger: output logger
    :return: index table (astropy Table)
    """
    if index_file.exists():
        # Start time of last update contained in the sync repo (if it exists), but do not look
        # back more than max_lookback days.  This is relevant for rarely sampled
        # content like cpe1eng.
        filetime0 = (DateTime(opt.date_stop) - opt.max_lookback).secs

        index_tbl = Table.read(index_file)
        if len(index_tbl) == 0:
            # Need to start with a fresh index_tbl since the string column will end up
            # with a length=1 string (date_id) and add_row later will give the wrong result.
            index_tbl = None
        else:
            filetime0 = max(filetime0, index_tbl['filetime1'][-1])
    else:
        # For initial index file creation use the --date-start option
        index_tbl = None
        filetime0 = DateTime(opt.date_start).secs

    max_secs = int(opt.max_days * 86400)
    time_stop = DateTime(opt.date_stop).secs

    # Step through the archfile files entries and collect them into groups of up
    # to --max-days based on file time stamp (which is an integer in CXC secs).
    rows = []
    filename = fetch.msid_files['archfiles'].abs
    logger.verbose(f'Opening archfiles {filename}')
    with DBI(dbi='sqlite', server=filename) as dbi:
        while True:
            filetime1 = min(filetime0 + max_secs, time_stop)
            logger.verbose(
                f'select from archfiles '
                f'filetime > {DateTime(filetime0).fits[:-4]} {filetime0} '
                f'filetime <= {DateTime(filetime1).fits[:-4]} {filetime1} ')
            archfiles = dbi.fetchall(f'select * from archfiles '
                                     f'where filetime > {filetime0} '
                                     f'and filetime <= {filetime1} '
                                     f'order by filetime ')

            # Found new archfiles?  If so get a new index table row for them.
            if len(archfiles) > 0:
                rows.append(get_row_from_archfiles(archfiles))
                filedates = DateTime(archfiles['filetime']).fits
                logger.verbose(f'Got {len(archfiles)} archfiles rows from '
                               f'{filedates[0]} to {filedates[-1]}')

            filetime0 = filetime1

            # Stop if already queried out to the end of desired time range
            if filetime1 >= time_stop:
                break

    if not rows:
        logger.info(f'No updates available for content {fetch.ft["content"]}')
        return index_tbl

    # Create table from scratch or add new rows.  In normal processing there
    # will just be one row per run.
    if index_tbl is None:
        index_tbl = Table(rows)
    else:
        for row in rows:
            index_tbl.add_row(row)

    if not index_file.parent.exists():
        logger.info(f'Making directory {index_file.parent}')
        index_file.parent.mkdir(exist_ok=True, parents=True)

    msg = check_index_tbl_consistency(index_tbl)
    if msg:
        msg += '\n'
        msg += '\n'.join(index_tbl.pformat(max_lines=-1, max_width=-1))
        logger.error(f'Index table inconsistency: {msg}')
        return None

    logger.info(f'Writing {len(rows)} row(s) to index file {index_file}')
    index_tbl.write(index_file, format='ascii.ecsv')

    return index_tbl
Example #48
0
File: shell.py Project: sot/pyyaks
 def write(self, s):
     logger.verbose(s)
Example #49
0
File: runasp.py Project: sot/runasp
def main(opt):
    # get files
    if opt.obsid:
        logger.info('Opening connection to archive server')
        arc5 = Ska.arc5gl.Arc5gl()
        for (prod, query) in pipe_config['archfiles']:
            proddir = os.path.join(opt.dir, prod)
            if not os.path.exists(proddir):
                logger.info('Creating directory {}'.format(proddir))
                os.makedirs(proddir)
            else:
                logger.info('Skipping directory {}: exists'.format(proddir))
                continue
            obsid = opt.obsid
            if logger.level < 20:
                arc5.echo = True
            arc5.sendline("cd %s" % os.path.abspath(proddir))
            arc5.sendline("obsid=%d" % int(obsid))
            logger.info('Sending "obsid={}"'.format(obsid))
            if opt.version is not None:
                logger.info('Sending "version={}"'.format(opt.version))
                arc5.sendline("version=%s" % opt.version)
            logger.info('Sending "get {}"'.format(query))
            arc5.sendline("get %s" % query)
            gotfiles = glob(os.path.join(proddir, "*"))
            if not len(gotfiles):
                os.rmdir(proddir)
        del arc5

    caiprops_files = glob(os.path.join(opt.dir, "asp05", "pcad*cai0a.par*"))
    if not len(caiprops_files):
        if os.path.exists(os.path.join(opt.dir, "asp05")):
            mock_cai_file(opt)
        else:
            raise ValueError

    # check files
    for filetype in ['infiles', 'outfiles']:
        for fileglob in pipe_config[filetype]:
            match = glob(os.path.join(opt.dir, fileglob))
            if not len(match):
                raise ValueError("No files found for glob %s" % fileglob)
            for mfile in match:
                if re.match(".*\.gz", mfile):
                    logger.verbose('Unzipping {}'.format(mfile))
                    bash("gunzip -f %s" % os.path.abspath(mfile))

    # reset this to get unzipped names
    caiprops_files = glob(os.path.join(opt.dir, "asp05", "pcad*cai0a.par*"))

    # constant aspect interval files
    obi = {}
    if len(caiprops_files):
        for cai_file in caiprops_files:
            cai = get_par(cai_file, cai_override)
            if not cai['obi_num'] in obi:
                obi[cai['obi_num']] = {}
            interval = 0
            while ("istart_%d" % interval in cai
                   and "istop_%d" % interval in cai):
                obi[cai['obi_num']][interval] = \
                    {'istart': cai['istart_%d' % interval],
                     'istop':  cai['istop_%d' % interval]}
                interval += 1

    ai_cmds = []
    for obi_num in obi:
        # read possible obspars
        obspar_files = glob(os.path.join(opt.dir, "obspar/*.par"))
        for ofile in obspar_files:
            obspar = get_obspar(ofile)
            if obspar['obi_num'] == obi_num:
                obsmatch = re.search('axaf(.+)_obs0a\.par', ofile)
                obiroot = obsmatch.group(1)
        if not obiroot:
            raise ValueError("no obspar for obi %d" % obi_num)

        for ai_num in obi[obi_num]:
            aspect_interval = obi[obi_num][ai_num]
            istart = aspect_interval['istart']
            istop = aspect_interval['istop']
            root = "f%09d" % istart
            # Work inplace?  Only if told to start at a specific pipe
            inplace = False
            if opt.pipe_start_at:
                inplace = True
            # directory setup
            workdir, indir, outdir = dir_setup(opt.dir,
                                               int(istart),
                                               label=opt.label,
                                               inplace=inplace,
                                               rev=opt.revision)

            # if skipping the slot by chucking the telem
            telem_skip_slot = []
            process_skip_slot = []
            if opt.skip_slot_method == 'telem':
                telem_skip_slot = opt.skip_slot
            else:
                process_skip_slot = opt.skip_slot

            # link relevant files
            link_files(opt.dir, indir, outdir, istart, istop, obiroot,
                       telem_skip_slot)

            # make list files
            make_list_files(opt.dir, indir, outdir, root)

            # spec
            cmd = dict(dir=os.path.abspath(opt.dir),
                       obi=obi_num,
                       indir=indir,
                       outdir=outdir,
                       root=root,
                       pipe_ped="%s.ped" % pipe_config['pipe_ped'],
                       istart=istart,
                       istop=istop,
                       obiroot=obiroot,
                       log="%s_f%09d.log" % (pipe_config['pipe_ped'], istart))
            if len(process_skip_slot):
                cmd['skip_slot'] = process_skip_slot
            if opt.pipe_start_at:
                cmd['pipe_start_at'] = opt.pipe_start_at
            if opt.pipe_stop_before:
                cmd['pipe_stop_before'] = opt.pipe_stop_before

            ai_cmds.append(cmd)

    range_ais = get_range_ai(ai_cmds, opt.range)
    run_ai(range_ais)
Example #50
0
def update_sync_data_full(content, logger, row):
    """
    Update full-resolution sync data including archfiles for index table ``row``

    This generates a gzipped pickle file with a dict that has sync update values
    for all available  MSIDs in this chunk of ``content`` telemetry.  This has
    `archfiles` (structured ndarray of rows) to store archfiles rows and then
    {msid}.quality, {msid}.data, {msid}.row0 and {msid}.row1.

    :param content: content type
    :param logger: global logger
    :param row: archfile row
    :return: None
    """
    ft = fetch.ft
    ft['interval'] = 'full'

    outfile = Path(sync_files['data'].abs)
    if outfile.exists():
        logger.verbose(f'Skipping {outfile}, already exists')
        return

    out = {}
    msids = list(fetch.all_colnames[content]) + ['TIME']

    # row{filetime0} and row{filetime1} are the *inclusive* `filetime` stamps
    # for the archfiles to be included  in this row.  They do not overlap, so
    # the selection below must be equality.
    with DBI(dbi='sqlite', server=fetch.msid_files['archfiles'].abs) as dbi:
        query = (f'select * from archfiles '
                 f'where filetime >= {row["filetime0"]} '
                 f'and filetime <= {row["filetime1"]} '
                 f'order by filetime ')
        archfiles = dbi.fetchall(query)
        out['archfiles'] = archfiles

    # Row slice indexes into full-resolution MSID h5 files.  All MSIDs share the
    # same row0:row1 range.
    row0 = row['row0']
    row1 = row['row1']

    # Go through each MSID and collect values
    n_msids = 0
    for msid in msids:
        ft['msid'] = msid
        filename = fetch.msid_files['msid'].abs
        if not Path(filename).exists():
            logger.debug(f'No MSID file for {msid} - skipping')
            continue

        n_msids += 1
        with tables.open_file(filename, 'r') as h5:
            out[f'{msid}.quality'] = h5.root.quality[row0:row1]
            out[f'{msid}.data'] = h5.root.data[row0:row1]
            out[f'{msid}.row0'] = row0
            out[f'{msid}.row1'] = row1

    n_rows = row1 - row0
    logger.info(
        f'Writing {outfile} with {n_rows} rows of data and {n_msids} msids')

    outfile.parent.mkdir(exist_ok=True, parents=True)
    # TODO: increase compression to max (gzip?)
    with gzip.open(outfile, 'wb') as fh:
        pickle.dump(out, fh)