Exemple #1
0
def move_archive_files(filetype, archfiles):
    ft['content'] = filetype.content.lower()

    stagedir = arch_files['stagedir'].abs
    if not os.path.exists(stagedir):
        os.makedirs(stagedir)

    for f in archfiles:
        if not os.path.exists(f):
            continue
        ft['basename'] = os.path.basename(f)
        tstart = re.search(r'(\d+)', str(ft['basename'])).group(1)
        datestart = DateTime(tstart).date
        ft['year'], ft['doy'] = re.search(r'(\d\d\d\d):(\d\d\d)',
                                          datestart).groups()

        archdir = arch_files['archdir'].abs
        archfile = arch_files['archfile'].abs

        if not os.path.exists(archdir):
            os.makedirs(archdir)

        if not os.path.exists(archfile):
            logger.info('mv %s %s' % (os.path.abspath(f), archfile))
            if not opt.dry_run:
                if not opt.occ:
                    shutil.copy2(f, stagedir)
                shutil.move(f, archfile)

        if os.path.exists(f):
            logger.verbose('Unlinking %s' % os.path.abspath(f))
            os.unlink(f)
def get_evt_meta(obsid, detector):
    """
    Get event file metadata (FITS keywords) for ``obsid`` and ``detector``.

    Returns a dict of key=value pairs, or None if there is no data in archive.
    """
    logger.info(f'Getting {obsid} {detector} from archive')
    det = 'hrc' if detector.startswith('HRC') else 'acis'
    arc5gl = Ska.arc5gl.Arc5gl()
    arc5gl.sendline('obsid={}'.format(obsid))
    arc5gl.sendline('get {}2'.format(det) + '{evt2}')
    del arc5gl

    files = glob.glob('{}f{}*_evt2.fits.gz'.format(det, obsid))
    if len(files) == 0:
        raise NoObsidError('No event file found for obsid {}'.format(obsid))
    if len(files) > 1:
        raise TooManyFilesError('Wrong number of files {}'.format(files))

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        evt2 = Table.read(files[0], hdu=1)
    os.unlink(files[0])

    evt = {k.lower(): v for k, v in evt2.meta.items()}
    evt['obs_chipx'], evt['obs_chipy'], evt[
        'obs_chip_id'] = dmcoords_chipx_chipy(evt)

    return evt
Exemple #3
0
def copy_statfiles_to_test(stat, dt, tstart, tstop):
    ft['interval'] = stat
    colnames = pickle.load(open(msid_files['colnames'].abs))
    for colname in colnames:
        ft['msid'] = colname
        if os.path.exists(test_msid_files['stats'].abs):
            continue
        if os.path.exists(msid_files['stats'].abs):
            logger.info('Copying {0} stats for MSID {1}'.format(stat, colname))
            statdir = os.path.dirname(test_msid_files['stats.tmp'].abs)
            if not os.path.exists(statdir):
                os.makedirs(statdir)
            shutil.copy(msid_files['stats'].abs,
                        test_msid_files['stats.tmp'].abs)
            h5 = tables.openFile(test_msid_files['stats.tmp'].abs, 'a')
            times = (h5.root.data.col('index') + 0.5) * dt
            row0, row1 = np.searchsorted(times, [tstart, tstop])
            #print colname, row0, row1, len(times), DateTime(times[row0]).date, DateTime(times[row1]).date,
            # Remove from row1-1 to end.  The row1-1 is because it is possible
            # to get the daily stat without the rest of the 5min data if
            # tstop is past noon of the day.  This messes up update_archive.
            h5.root.data.removeRows(row1 - 1, h5.root.data.nrows)
            h5.root.data.removeRows(0, row0)
            h5.copyFile(test_msid_files['stats'].abs, overwrite=True)
            newtimes = (h5.root.data.col('index') + 0.5) * dt
            #print len(newtimes), DateTime(newtimes[0]).date, DateTime(newtimes[-1]).date
            h5.close()
            os.unlink(test_msid_files['stats.tmp'].abs)
Exemple #4
0
def del_stats(colname, time0, interval):
    """Delete all rows in ``interval`` stats file for column ``colname`` that
    occur after time ``time0`` - ``interval``.  This is used to fix problems
    that result from a file misorder.  Subsequent runs of update_stats will
    refresh the values correctly.
    """
    dt = {'5min': 328, 'daily': 86400}[interval]

    ft['msid'] = colname
    ft['interval'] = interval
    stats_file = msid_files['stats'].abs
    if not os.path.exists(stats_file):
        raise IOError('Stats file {} not found'.format(stats_file))

    logger.info('Fixing stats file %s after time %s', stats_file,
                DateTime(time0).date)

    stats = tables.openFile(stats_file,
                            mode='a',
                            filters=tables.Filters(complevel=5,
                                                   complib='zlib'))
    index0 = time0 // dt - 1
    indexes = stats.root.data.col('index')[:]
    row0 = np.searchsorted(indexes, [index0])[0] - 1
    if opt.dry_run:
        n_del = len(stats.root.data) - row0
    else:
        n_del = stats.root.data.removeRows(row0, len(stats.root.data))
    logger.info('Deleted %d rows from row %s (%s) to end', n_del, row0,
                DateTime(indexes[row0] * dt).date)
    stats.close()
Exemple #5
0
def try4times(func, *arg, **kwarg):
    """
    Work around problems with sqlite3 database getting locked out from writing,
    presumably due to read activity.  Not completely understood.

    This function will try to run func(*arg, **kwarg) a total of 4 times with an
    increasing sequence of wait times between tries.  It catches only a database
    locked error.
    """
    from django.db.utils import OperationalError

    for delay in 0, 5, 10, 60:
        if delay > 0:
            time.sleep(delay)

        try:
            func(*arg, **kwarg)
        except OperationalError as err:
            if 'database is locked' in str(err):
                # Locked DB, issue informational warning
                logger.info(
                    'Warning: locked database, waiting {} seconds'.format(
                        delay))
            else:
                # Something else so just re-raise
                raise
        else:
            # Success, jump out of loop
            break

    else:
        # After 4 tries bail out with an exception
        raise OperationalError('database is locked')
Exemple #6
0
def read_derived(i, filename, filetype, row, colnames, archfiles, db):
    """Read derived data using eng_archive and derived computation classes.
    ``filename`` has format <content>_<index0>_<index1> where <content>
    is the content type (e.g. "dp_thermal128"), <index0> is the start index for
    the new data and index1 is the end index (using Python slicing convention
    index0:index1).  Args ``i``, ``filetype``, and ``row`` are as in
    read_archive().  ``row`` must equal <index0>.  ``colnames`` is the list of
    column names for the content type.
    """
    # Check if filename is already in archfiles.  If so then abort further processing.

    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)):
        logger.verbose('File %s already in archfiles - skipping' % filename)
        return None, None

    # f has format <content>_<index0>_<index1>
    # <content> has format dp_<content><mnf_step> e.g. dp_thermal128
    content, index0, index1 = filename.split(':')
    index0 = int(index0)
    index1 = int(index1)
    mnf_step = int(re.search(r'(\d+)$', content).group(1))
    time_step = mnf_step * derived.MNF_TIME
    times = time_step * np.arange(index0, index1)

    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    vals = {}
    bads = np.zeros((len(times), len(colnames)), dtype=np.bool)
    for i, colname in enumerate(colnames):
        if colname == 'TIME':
            vals[colname] = times
            bads[:, i] = False
        else:
            dp_class = getattr(Ska.engarchive.derived, colname.upper())
            dp = dp_class()
            dataset = dp.fetch(times[0] - 1000, times[-1] + 1000)
            ok = (index0 <= dataset.indexes) & (dataset.indexes < index1)
            vals[colname] = dp.calc(dataset)[ok]
            bads[:, i] = dataset.bads[ok]

    vals['QUALITY'] = bads
    dat = Ska.Numpy.structured_array(vals, list(colnames) + ['QUALITY'])

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    date = DateTime(times[0]).date
    year, doy = date[0:4], date[5:8]
    archfiles_row = dict(filename=filename,
                         filetime=int(index0 * time_step),
                         year=year,
                         doy=doy,
                         tstart=times[0],
                         tstop=times[-1],
                         rowstart=row,
                         rowstop=row + len(dat),
                         startmjf=index0,
                         stopmjf=index1,
                         date=date)

    return dat, archfiles_row
Exemple #7
0
def remove_outdated_sync_files(opt, logger, index_tbl, index_file):
    """
    Remove the sync data dirs and index file rows which correspond to data so
    that no more than ``opt.max_sync_dirs`` sync directories are retained.

    :param opt: options
    :param logger: logger
    :param index_tbl: table containing sync repo entries
    :param index_file: index file Path
    :return: mask of rows that were removed
    """
    # If index table is not too long then no action required.
    if len(index_tbl) <= opt.max_sync_dirs:
        return

    # Index before which rows will be deleted.  Note that index_tbl is guaranteed to be
    # sorted by in ascending order by row (and thus by time) because of
    # check_index_tbl_consistency()
    idx0 = len(index_tbl) - opt.max_sync_dirs

    # Iterate over rows to be deleted and delete corresponding file directories.
    for row in index_tbl[:idx0]:
        fetch.ft['date_id'] = row['date_id']
        data_dir = sync_files['data_dir'].abs
        if Path(data_dir).exists():
            logger.info(f'Removing sync directory {data_dir}')
            shutil.rmtree(data_dir)

    index_tbl = index_tbl[idx0:]
    logger.info(f'Writing {len(index_tbl)} row(s) to index file {index_file}')
    index_tbl.write(index_file, format='ascii.ecsv', overwrite=True)
Exemple #8
0
def remove_outdated_sync_files(opt, logger, index_tbl):
    """
    Remove the sync data dirs and index file rows which correspond to data
    that is more than opt.max_lookback days older than opt.date_stop (typically
    NOW).

    :param opt: options
    :param logger: logger
    :param index_tbl: table containing sync repo entries
    :return: mask of rows that were removed
    """
    min_time = (DateTime(opt.date_stop) - opt.max_lookback).secs

    # Ephemeris files are time stamped around a month before current date,
    # so leave them around for couple months longer.
    if re.search(r'ephem\d$', str(fetch.ft['content'])):
        min_time -= 60 * 86400

    remove_mask = np.zeros(len(index_tbl), dtype=bool)

    # Iterate over all but the last row of the table, removing any
    # directories for updates from before `min_time`.  Leaving the last
    # row gives a direct record of when the last update occurred, but is
    # benign from the perspective of updating the client archive.
    for idx, row in zip(range(len(index_tbl) - 1), index_tbl):
        if row['filetime0'] < min_time:
            fetch.ft['date_id'] = row['date_id']
            remove_mask[idx] = True
            data_dir = sync_files['data_dir'].abs
            if Path(data_dir).exists():
                logger.info(f'Removing sync directory {data_dir}')
                shutil.rmtree(data_dir)

    return remove_mask
def copy_statfiles_to_test(stat, dt, tstart, tstop):
    ft['interval'] = stat
    colnames = pickle.load(open(msid_files['colnames'].abs))
    for colname in colnames:
        ft['msid'] = colname
        if os.path.exists(test_msid_files['stats'].abs):
            continue
        if os.path.exists(msid_files['stats'].abs):
            logger.info('Copying {0} stats for MSID {1}'.format(stat, colname))
            statdir = os.path.dirname(test_msid_files['stats.tmp'].abs)
            if not os.path.exists(statdir):
                os.makedirs(statdir)
            shutil.copy(msid_files['stats'].abs, test_msid_files['stats.tmp'].abs)
            h5 = tables.openFile(test_msid_files['stats.tmp'].abs, 'a')
            times = (h5.root.data.col('index') + 0.5) * dt
            row0, row1 = np.searchsorted(times, [tstart, tstop])
            #print colname, row0, row1, len(times), DateTime(times[row0]).date, DateTime(times[row1]).date,
            # Remove from row1-1 to end.  The row1-1 is because it is possible
            # to get the daily stat without the rest of the 5min data if
            # tstop is past noon of the day.  This messes up update_archive.
            h5.root.data.removeRows(row1 - 1, h5.root.data.nrows)
            h5.root.data.removeRows(0, row0)
            h5.copyFile(test_msid_files['stats'].abs, overwrite=True)
            newtimes = (h5.root.data.col('index') + 0.5) * dt
            #print len(newtimes), DateTime(newtimes[0]).date, DateTime(newtimes[-1]).date
            h5.close()
            os.unlink(test_msid_files['stats.tmp'].abs)
Exemple #10
0
def main():
    global opt
    opt = get_opt()
    info = {'date': opt.stop,
            'start': opt.start,
            'stop': opt.stop,
            'box_duration_months': opt.box_duration}

    asol_aimpoint = get_asol(info)

    asol_monthly = AsolBinnedStats(asol_aimpoint, 365.25 / 12)
    for det in ('ACIS-S', 'ACIS-I'):
        asol_monthly.det = det
        det_title = asol_monthly.det_title
        info[det_title] = asol_monthly.get_chip_x_y_info()
        asol_monthly.plot_chip_x_y(info[det_title])

    asol_monthly.plot_intra_obs_dy_dz()

    plot_housing_temperature()

    info_file = os.path.join(opt.data_root, 'info.json')
    with open(info_file, 'w') as fh:
        logger.info('Writing info file {}'.format(info_file))
        json.dump(make_pure_python(info), fh, indent=4, sort_keys=True)
Exemple #11
0
def make_msid_file(colname, content, content_def):
    ft['content'] = content
    ft['msid'] = colname
    filename = msid_files['data'].abs
    if os.path.exists(filename):
        return

    logger.info('Making MSID data file %s', filename)

    if colname == 'TIME':
        dp_vals, indexes = derived.times_indexes(opt.start, opt.stop,
                                                 content_def['time_step'])
    else:
        dp = content_def['classes'][colname]()
        dataset = dp.fetch(opt.start, opt.stop)
        dp_vals = np.asarray(dp.calc(dataset), dtype=dp.dtype)

    # Finally make the actual MSID data file
    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.openFile(filename, mode='w', filters=filters)
    
    n_rows = int(20 * 3e7 / content_def['time_step'])
    h5shape = (0,) 
    h5type = tables.Atom.from_dtype(dp_vals.dtype)
    h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname,
                    expectedrows=n_rows)
    h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality',
                    expectedrows=n_rows)

    logger.info('Made {} shape={} with n_rows(1e6)={}'.format(colname, h5shape, n_rows / 1.0e6))
    h5.close()
Exemple #12
0
def main():
    global opt
    opt = get_opt()
    info = {
        'date': opt.stop,
        'start': opt.start,
        'stop': opt.stop,
        'box_duration_months': opt.box_duration
    }

    asol_aimpoint = get_asol(info)

    asol_monthly = AsolBinnedStats(asol_aimpoint, 365.25 / 12)
    for det in ('ACIS-S', 'ACIS-I'):
        asol_monthly.det = det
        det_title = asol_monthly.det_title
        info[det_title] = asol_monthly.get_chip_x_y_info()
        asol_monthly.plot_chip_x_y(info[det_title])

    asol_monthly.plot_intra_obs_dy_dz()

    plot_housing_temperature()

    info_file = os.path.join(opt.data_root, 'info.json')
    with open(info_file, 'w') as fh:
        logger.info('Writing info file {}'.format(info_file))
        json.dump(make_pure_python(info), fh, indent=4, sort_keys=True)
Exemple #13
0
def get_idx_cmds(cmds, pars_dict):
    """
    For the input `cmds` (list of dicts), convert to the indexed command format where
    parameters are specified as an index into `pars_dict`, a dict of unique parameter
    values.

    Returns `idx_cmds` as a list of tuples:
       (par_idx, date, time, cmd, tlmsid, scs, step, timeline_id, vcdu)
    """
    idx_cmds = []

    for i, cmd in enumerate(cmds):
        if i % 10000 == 9999:
            logger.info('   Iteration {}'.format(i))

        # Define a consistently ordered tuple that has all command parameter information
        pars = cmd['params']
        keys = set(pars.keys()) - set(('SCS', 'STEP', 'TLMSID'))
        if cmd['tlmsid'] == 'AOSTRCAT':
            # Skip star catalog command because that has many (uninteresting) parameters
            # and increases the file size and load speed by an order of magnitude.
            pars_tup = ()
        else:
            pars_tup = tuple((key.lower(), pars[key]) for key in sorted(keys))

        try:
            par_idx = pars_dict[pars_tup]
        except KeyError:
            par_idx = len(pars_dict)
            pars_dict[pars_tup] = par_idx

        idx_cmds.append((par_idx, cmd['date'], cmd['type'], cmd.get('tlmsid'),
                         cmd['scs'], cmd['step'], cmd['timeline_id'], cmd['vcdu']))

    return idx_cmds
def main():
    filetypes = Ska.Table.read_ascii_table('filetypes.dat')
    if len(sys.argv) == 2:
        filetypes = filetypes[ filetypes['content'] == sys.argv[1].upper() ]

    loglevel = pyyaks.logger.INFO
    logger = pyyaks.logger.get_logger(level=loglevel, format="%(message)s")

    for filetype in filetypes:
        ft.content = filetype.content.lower()

        orig_files_glob = os.path.join(orig_arch_files['contentdir'].abs, filetype['fileglob'])
        logger.info('orig_files_glob=%s', orig_files_glob)
        for f in glob.glob(orig_files_glob):
            ft.basename = os.path.basename(f)
            tstart = re.search(r'(\d+)', ft.basename).group(1)
            datestart = DateTime(tstart).date
            ft.year, ft.doy = re.search(r'(\d\d\d\d):(\d\d\d)', datestart).groups()

            archdir = arch_files['archdir'].abs
            archfile = arch_files['archfile'].abs

            if not os.path.exists(archdir):
                print 'Making dir', archdir
                os.makedirs(archdir)
                
            if not os.path.exists(archfile):
                # logger.info('mv %s %s' % (f, archfile))
                shutil.move(f, archfile)
Exemple #15
0
def plot_n_kalman(obsid, plot_dir, save=False):
    """
    Fetch and plot number of Kalman stars as function of time for
    the requested obsid.
    """
    d = events.dwells.filter(obsid=obsid)[0]
    start = d.start
    stop = d.stop
    n_kalman = get_n_kalman(start, stop)

    plt.figure(figsize=(8, 2.5))

    t0 = n_kalman.times[0]

    # The Kalman vals are strings, so these can be out of order on y axis
    # if not handled as ints.
    plot_cxctime(n_kalman.times, n_kalman.vals.astype(int), color='k')
    plot_cxctime([t0, t0 + 1000], [0.5, 0.5], lw=3, color='orange')

    plt.text(DateTime(t0).plotdate, 0.7, "1 ksec")
    plt.ylabel('# Kalman stars')
    ylims = plt.ylim()
    plt.ylim(-0.2, ylims[1] + 0.2)
    plt.grid(ls=':')

    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.25, top=0.95)

    if save:
        outroot = os.path.join(plot_dir, f'n_kalman_{obsid}')
        logger.info(f'Writing plot file {outroot}.png')
        plt.savefig(outroot + '.png')
        plt.close()
Exemple #16
0
def main():
    filetypes = Ska.Table.read_ascii_table('filetypes.dat')
    if len(sys.argv) == 2:
        filetypes = filetypes[filetypes['content'] == sys.argv[1].upper()]

    loglevel = pyyaks.logger.INFO
    logger = pyyaks.logger.get_logger(level=loglevel, format="%(message)s")

    for filetype in filetypes:
        ft.content = filetype.content.lower()

        orig_files_glob = os.path.join(orig_arch_files['contentdir'].abs,
                                       filetype['fileglob'])
        logger.info('orig_files_glob=%s', orig_files_glob)
        for f in glob.glob(orig_files_glob):
            ft.basename = os.path.basename(f)
            tstart = re.search(r'(\d+)', ft.basename).group(1)
            datestart = DateTime(tstart).date
            ft.year, ft.doy = re.search(r'(\d\d\d\d):(\d\d\d)',
                                        datestart).groups()

            archdir = arch_files['archdir'].abs
            archfile = arch_files['archfile'].abs

            if not os.path.exists(archdir):
                print 'Making dir', archdir
                os.makedirs(archdir)

            if not os.path.exists(archfile):
                # logger.info('mv %s %s' % (f, archfile))
                shutil.move(f, archfile)
Exemple #17
0
        def new_func(*args, **kwargs):
            runval = run(func.__name__) if isinstance(run, collections.Callable) else run
            if runval is False:
                return
            elif runval is True:
                pass
            elif runval is None:
                if status['fail']:
                    return
            else:
                raise ValueError('run value = %s but must be True, False, or None' % runval)

            logger.verbose('')
            logger.verbose('-' * 60)
            logger.info(' Running task: %s at %s' % (func.__name__, time.ctime()))
            logger.verbose('-' * 60)

            try:
                func(*args, **kwargs)
                pyyaks.context.store_context(status.get('context_file'))
            except KeyboardInterrupt:
                raise
            except TaskSkip:
                pass
            except:
                if status['fail'] is False:
                    logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc()))
                    status['fail'] = True
Exemple #18
0
def try4times(func, *arg, **kwarg):
    """
    Work around problems with sqlite3 database getting locked out from writing,
    presumably due to read activity.  Not completely understood.

    This function will try to run func(*arg, **kwarg) a total of 4 times with an
    increasing sequence of wait times between tries.  It catches only a database
    locked error.
    """
    from django.db.utils import OperationalError

    for delay in 0, 5, 10, 60:
        if delay > 0:
            time.sleep(delay)

        try:
            func(*arg, **kwarg)
        except OperationalError as err:
            if 'database is locked' in str(err):
                # Locked DB, issue informational warning
                logger.info('Warning: locked database, waiting {} seconds'.format(delay))
            else:
                # Something else so just re-raise
                raise
        else:
            # Success, jump out of loop
            break

    else:
        # After 4 tries bail out with an exception
        raise OperationalError('database is locked')
Exemple #19
0
def del_stats(colname, time0, interval):
    """Delete all rows in ``interval`` stats file for column ``colname`` that
    occur after time ``time0`` - ``interval``.  This is used to fix problems
    that result from a file misorder.  Subsequent runs of update_stats will
    refresh the values correctly.
    """
    dt = {'5min': 328,
          'daily': 86400}[interval]

    ft['msid'] = colname
    ft['interval'] = interval
    stats_file = msid_files['stats'].abs
    if not os.path.exists(stats_file):
        raise IOError('Stats file {} not found'.format(stats_file))

    logger.info('Fixing stats file %s after time %s', stats_file, DateTime(time0).date)

    stats = tables.open_file(stats_file, mode='a',
                            filters=tables.Filters(complevel=5, complib='zlib'))
    index0 = time0 // dt - 1
    indexes = stats.root.data.col('index')[:]
    row0 = np.searchsorted(indexes, [index0])[0] - 1
    if opt.dry_run:
        n_del = len(stats.root.data) - row0
    else:
        n_del = stats.root.data.remove_rows(row0, len(stats.root.data))
    logger.info('Deleted %d rows from row %s (%s) to end', n_del, row0,
                DateTime(indexes[row0] * dt).date)
    stats.close()
def get_obsid(obsid, dt=3.0):
    """
    Get an obsid
    """
    obsids = events.obsids.filter(obsid__exact=obsid)
    if len(obsids) == 0:
        raise ValueError('No obsid={} in kadi database'.format(obsid))

    dwells = events.dwells.filter(obsids[0].start, obsids[0].stop)
    obsid_dwells = [dwell for dwell in dwells if dwell.start > obsids[0].start]
    logger.info('Using obsid dwell(s): {}'
                .format(','.join(str(dwell) for dwell in obsid_dwells)))

    scs107s = events.scs107s.filter(obsid=obsid)
    tstart = DateTime(obsid_dwells[0].start).secs
    if len(scs107s) > 0:
        tstop = scs107s[0].tstop - 200
    else:
        tstop = DateTime(obsid_dwells[-1].stop).secs
    if tstop - tstart < 2000:
        raise ValueError('Observation interval too short {}'.format(tstop - tstart))
    telems, slots = get_archive_data(tstart, tstop)
    out = telems_to_struct(telems, slots)
    out['obsid'] = obsid

    return out
Exemple #21
0
def create_content_dir():
    """
    Make empty files for colnames.pkl, colnames_all.pkl and archfiles.db3
    for the current content type ft['content'].

    This only works within the development (git) directory in conjunction
    with the --create option.
    """
    dirname = msid_files['contentdir'].abs
    if not os.path.exists(dirname):
        logger.info('Making directory {}'.format(dirname))
        os.makedirs(dirname)

    empty = set()
    if not os.path.exists(msid_files['colnames'].abs):
        with open(msid_files['colnames'].abs, 'w') as f:
            pickle.dump(empty, f)
    if not os.path.exists(msid_files['colnames_all'].abs):
        with open(msid_files['colnames_all'].abs, 'w') as f:
            pickle.dump(empty, f)

    if not os.path.exists(msid_files['archfiles'].abs):
        archfiles_def = open('archfiles_def.sql').read()
        filename = msid_files['archfiles'].abs
        logger.info('Creating db {}'.format(filename))
        db = Ska.DBI.DBI(dbi='sqlite', server=filename, autocommit=False)
        db.execute(archfiles_def)
        db.commit()
Exemple #22
0
def main():
    opt = get_opt()
    logger.info('Centroid dashboard, started')
    update_observed_metrics(obsid=opt.obsid, start=opt.start, stop=opt.stop,
                            force=opt.force, data_root=opt.data_root,
                            make_plots=opt.make_plots, save=opt.save)
    logger.info('Centroid dashboard, ended')
Exemple #23
0
def make_archfiles_db(filename, content_def):
    # Do nothing if it is already there
    if os.path.exists(filename):
        return

    datestart = DateTime(DateTime(opt.start).secs - 60)
    tstart = datestart.secs
    tstop = tstart
    year, doy = datestart.date.split(':')[:2]
    times, indexes = derived.times_indexes(tstart, tstop,
                                           content_def['time_step'])

    logger.info('Creating db {}'.format(filename))
    archfiles_def = open(Path(__file__).parent / 'archfiles_def.sql').read()
    db = Ska.DBI.DBI(dbi='sqlite', server=filename)
    db.execute(archfiles_def)
    archfiles_row = dict(
        filename='{}:0:1'.format(content_def['content']),
        filetime=0,
        year=year,
        doy=doy,
        tstart=tstart,
        tstop=tstop,
        rowstart=0,
        rowstop=0,
        startmjf=indexes[0],  # really index0
        stopmjf=indexes[-1],  # really index1
        date=datestart.date)
    db.insert(archfiles_row, 'archfiles')
Exemple #24
0
def create_content_dir():
    """
    Make empty files for colnames.pkl, colnames_all.pkl and archfiles.db3
    for the current content type ft['content'].

    This only works within the development (git) directory in conjunction
    with the --create option.
    """
    dirname = msid_files['contentdir'].abs
    if not os.path.exists(dirname):
        logger.info('Making directory {}'.format(dirname))
        os.makedirs(dirname)

    empty = set()
    if not os.path.exists(msid_files['colnames'].abs):
        with open(msid_files['colnames'].abs, 'wb') as f:
            pickle.dump(empty, f, protocol=0)
    if not os.path.exists(msid_files['colnames_all'].abs):
        with open(msid_files['colnames_all'].abs, 'wb') as f:
            pickle.dump(empty, f, protocol=0)

    if not os.path.exists(msid_files['archfiles'].abs):
        archfiles_def = open('archfiles_def.sql').read()
        filename = msid_files['archfiles'].abs
        logger.info('Creating db {}'.format(filename))
        db = Ska.DBI.DBI(dbi='sqlite', server=filename, autocommit=False)
        db.execute(archfiles_def)
        db.commit()
Exemple #25
0
def make_archfiles_db(filename, content_def):
    # Do nothing if it is already there
    if os.path.exists(filename):
        return

    datestart = DateTime(DateTime(opt.start).secs - 60)
    tstart = datestart.secs
    tstop = tstart
    year, doy = datestart.date.split(':')[:2]
    times, indexes = derived.times_indexes(tstart, tstop, content_def['time_step'])

    logger.info('Creating db {}'.format(filename))
    archfiles_def = open('archfiles_def.sql').read()
    db = Ska.DBI.DBI(dbi='sqlite', server=filename)
    db.execute(archfiles_def)
    archfiles_row = dict(filename='{}:0:1'.format(content_def['content']),
                         filetime=0,
                         year=year,
                         doy=doy,
                         tstart=tstart,
                         tstop=tstop,
                         rowstart=0,
                         rowstop=0,
                         startmjf=indexes[0], # really index0
                         stopmjf=indexes[-1],  # really index1
                         date=datestart.date)
    db.insert(archfiles_row, 'archfiles')
def move_archive_files(filetype, archfiles):
    ft['content'] = filetype.content.lower()

    stagedir = arch_files['stagedir'].abs
    if not os.path.exists(stagedir):
        os.makedirs(stagedir)

    for f in archfiles:
        if not os.path.exists(f):
            continue
        ft['basename'] = os.path.basename(f)
        tstart = re.search(r'(\d+)', str(ft['basename'])).group(1)
        datestart = DateTime(tstart).date
        ft['year'], ft['doy'] = re.search(r'(\d\d\d\d):(\d\d\d)', datestart).groups()

        archdir = arch_files['archdir'].abs
        archfile = arch_files['archfile'].abs

        if not os.path.exists(archdir):
            os.makedirs(archdir)

        if not os.path.exists(archfile):
            logger.info('mv %s %s' % (os.path.abspath(f), archfile))
            if not opt.dry_run:
                if not opt.occ:
                    shutil.copy2(f, stagedir)
                shutil.move(f, archfile)

        if os.path.exists(f):
            logger.verbose('Unlinking %s' % os.path.abspath(f))
            os.unlink(f)
Exemple #27
0
def update_sync_repo(opt, logger, content):
    """

    :param opt: argparse options
    :param logger: logger instance
    :param content: content type
    :return:
    """
    # File types context dict
    ft = fetch.ft
    ft['content'] = content

    index_file = Path(sync_files['index'].abs)
    index_tbl = update_index_file(index_file, opt, logger)

    if index_tbl is None:
        # Index table was not created, nothing more to do here
        logger.warning(f'No index table for {content}')
        return

    for row in index_tbl:
        ft = fetch.ft
        ft['date_id'] = row['date_id']

        update_sync_data_full(content, logger, row)
        update_sync_data_stat(content, logger, row, '5min')
        update_sync_data_stat(content, logger, row, 'daily')

    remove_mask = remove_outdated_sync_files(opt, logger, index_tbl)
    if np.any(remove_mask):
        index_tbl = index_tbl[~remove_mask]
        logger.info(
            f'Writing {len(index_tbl)} row(s) to index file {index_file}')
        index_tbl.write(index_file, format='ascii.ecsv')
Exemple #28
0
def get_idx_cmds(cmds, pars_dict):
    """
    For the input `cmds` (list of dicts), convert to the indexed command format where
    parameters are specified as an index into `pars_dict`, a dict of unique parameter
    values.

    Returns `idx_cmds` as a list of tuples:
       (par_idx, date, time, cmd, tlmsid, scs, step)
    """
    idx_cmds = []

    for i, cmd in enumerate(cmds):
        if i % 10000 == 9999:
            logger.info('   Iteration {}'.format(i))

        # Define a consistently ordered tuple that has all command parameter information
        pars = cmd['params']
        keys = set(pars.keys()) - set(('SCS', 'STEP', 'TLMSID'))
        if cmd['tlmsid'] == 'AOSTRCAT':
            # Skip star catalog command because that has many (uninteresting) parameters
            # and increases the file size and load speed by an order of magnitude.
            pars_tup = ()
        else:
            pars_tup = tuple((key.lower(), pars[key]) for key in sorted(keys))

        try:
            par_idx = pars_dict[pars_tup]
        except KeyError:
            par_idx = len(pars_dict)
            pars_dict[pars_tup] = par_idx

        idx_cmds.append((par_idx, cmd['date'], cmd['type'], cmd.get('tlmsid'),
                         cmd['scs'], cmd['step'], cmd['timeline_id']))

    return idx_cmds
def make_h5_col_file(dats, colname):
    """Make a new h5 table to hold column from ``dat``."""
    filename = msid_files['msid'].abs
    filedir = os.path.dirname(filename)
    if not os.path.exists(filedir):
        os.makedirs(filedir)

    # Estimate the number of rows for 20 years based on available data
    times = np.hstack([x['TIME'] for x in dats])
    dt = np.median(times[1:] - times[:-1])
    n_rows = int(86400 * 365 * 20 / dt)

    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.openFile(filename, mode='w', filters=filters)

    col = dats[0][colname]
    h5shape = (0,) + col.shape[1:]
    h5type = tables.Atom.from_dtype(col.dtype)
    h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname,
                    expectedrows=n_rows)
    h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality',
                    expectedrows=n_rows)
    logger.info('Made {} shape={} with n_rows(1e6)={}'
                .format(colname, h5shape, n_rows / 1.0e6))
    h5.close()
Exemple #30
0
def _auto_file_discovery(ingest_type, source_type):
    """ Automatically discover and return a list of files to ingest in the staging area.

    Parameters
    ==========
        ingest_type : str
            the type of file to match in the file search. 
            examples of valid values are: h5, hdf, csv, txt
        source_type : str
            the source type of the data e.g. (FILE_PREFIX, Directory Path or Sim vs. Flight)
            source_type of an empty string means sources of ingest_type in default staging.

    Returns
    =======
        A Python list of filenames based on the type of files to search for given by the
        discovery parameters.

    """
    logger.info(f"Attempting automatic file discovery in {STAGING_DIRECTORY} with ingest type {ingest_type}... ")

    ingest_files = []
    ingest_files.extend(sorted(glob.glob(f"{STAGING_DIRECTORY}/{source_type}*.{ingest_type}")))

    logger.info(f"{len(ingest_files)} file(s) staged in {STAGING_DIRECTORY} ...")

    return ingest_files
Exemple #31
0
def _sync_stat_archive(opt, msid_files, logger, content, stat, index_tbl):
    """
    Actual worker for syncing the stat archive for ``content``.
    """
    # Get the last row of data from the length of the TIME.col (or archfiles?)
    ft = fetch.ft
    ft['content'] = content
    ft['interval'] = stat

    stats_dir = Path(msid_files['statsdir'].abs)
    if not stats_dir.exists():
        logger.debug(f'Skipping {stat} data for {content}: no directory')
        return

    logger.info('')
    logger.info(f'Processing {stat} data for {content}')

    # Get the MSIDs that are in client archive
    msids = [str(fn.name)[:-3] for fn in stats_dir.glob('*.h5')]
    if not msids:
        logger.debug(f'Skipping {stat} data for {content}: no stats h5 files')
        return
    else:
        logger.debug(f'Stat msids are {msids}')

    last_date_id, last_date_id_file = get_last_date_id(
        msid_files, msids, stat, logger)
    logger.verbose(f'Got {last_date_id} as last date_id that was applied to archive')

    # Get list of applicable dat objects (new data, before opt.date_stop).  Also
    # return ``date_id`` which is the date_id of the final data set in the list.
    # This will be written as the new ``last_date_id``.
    try:
        dats, date_id = get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt)
    except urllib.error.URLError as err:
        if 'timed out' in str(err):
            msg = f'  ERROR: timed out getting {stat} data for {content}'
            logger.error(msg)
            process_errors.append(msg)
            return
        else:
            raise

    if not dats:
        return

    dat, msids = concat_data_sets(dats, ['data'])
    with DelayedKeyboardInterrupt(logger):
        with timing_logger(logger, f'Applying updates to {len(msids)} h5 files'):
            for msid in msids:
                fetch.ft['msid'] = msid
                stat_file = msid_files['stats'].abs
                if os.path.exists(stat_file):
                    append_stat_col(dat, stat_file, msid, date_id, opt, logger)

            logger.debug(f'Updating {last_date_id_file} with {date_id}')
            if not opt.dry_run:
                with open(last_date_id_file, 'w') as fh:
                    fh.write(f'{date_id}')
Exemple #32
0
def plot_att_errors_per_obsid(obsid, plot_dir, coord='dr', att_errors=None,
                              save=False, on_the_fly=False):
    """
    Make png plot of att errors vs time per obsid.

    :param obsid: obsid
    :param att_errors: dictionary with keys including at minimum a coordinate ('dr',
                       'dy' or 'dp' for roll, yaw and pitch) and 'time' (default 'dr')
    :param on_the_fly: default False, if True then ignore param att_errors and derive
                       attitude errors for the requested obsid.
    """

    if coord not in ('dr', 'dp', 'dy'):
        raise ValueError('Coordinate for att error should be dr, dp or dy')

    if on_the_fly:
        att_errors = get_observed_att_errors(obsid, on_the_fly=on_the_fly)
        if att_errors is None:
            return None
    else:
        if att_errors is None:
            raise ValueError('Need to provide att_errors if on_the_fly is False')

    errs = att_errors[coord]
    dates = DateTime(att_errors['time'])

    plt.figure(figsize=(8, 2.5))

    # Skip the first 5 min for observations with duration > 5 min
    dur = dates.secs[-1] - dates.secs[0]
    if dur > 5 * 60:
        ok = dates.secs > dates.secs[0] + 5 * 60
    else:
        ok = np.ones_like(dates.secs, dtype=bool)

    plt.plot(dates.secs[ok] - dates.secs[ok][0], errs[ok],
             '-', lw=2, color='k')

    ylims = plt.ylim()

    if max(ylims) > 100:
        plt.ylim(-max(ylims) - 10, max(ylims) + 10)
    else:
        plt.ylim(-100, 100)

    plt.ylabel(f'{coord} (arcsec)')
    plt.xlabel('Time (sec)')
    plt.grid(ls=':')

    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.25, top=0.95)

    if save:
        outroot = os.path.join(plot_dir, f'observed_{coord}s_{obsid}')
        logger.info(f'Writing plot file {outroot}.png')
        plt.savefig(outroot + '.png')
        plt.close()

    return att_errors
def check_filetype(filetype):
    ft['content'] = filetype.content.lower()

    if not os.path.exists(msid_files['archfiles'].abs):
        logger.info('No archfiles.db3 for %s - skipping' % ft['content'])
        return

    logger.info('Checking {} content type, archfiles {}'.format(
        ft['content'], msid_files['archfiles'].abs))

    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    archfiles = db.fetchall('select * from archfiles')
    db.conn.close()

    if opt.check_order:
        for archfile0, archfile1 in zip(archfiles[:-1], archfiles[1:]):
            exception = (archfile0['startmjf'] == 77826
                         and archfile0['year'] == 2004
                         and archfile0['doy'] == 309)
            if archfile1['tstart'] < archfile0['tstart'] and not exception:
                logger.info(
                    'ERROR: archfile order inconsistency\n {}\n{}'.format(
                        archfile0, archfile1))

    if not opt.check_lengths:
        colnames = ['TIME']
    else:
        colnames = [
            x for x in pickle.load(open(msid_files['colnames'].abs))
            if x not in fetch.IGNORE_COLNAMES
        ]

    lengths = set()
    for colname in colnames:
        ft['msid'] = colname

        h5 = tables.open_file(msid_files['msid'].abs, mode='r')
        length = len(h5.root.data)
        h5.root.data[length - 1]
        h5.close()

        logger.verbose('MSID {} has length {}'.format(colname, length))
        lengths.add(length)
        if len(lengths) != 1:
            logger.info('ERROR: inconsistent MSID length {} {} {}'.format(
                ft['content'], colname, lengths))
            return  # Other checks don't make sense now

    length = lengths.pop()

    archfile = archfiles[-1]
    if archfile['rowstop'] != length:
        logger.info('ERROR: inconsistent archfile {}: '
                    'last rowstop={} MSID length={}'.format(
                        ft['content'], archfile['rowstop'], length))
        if opt.find_glitch:
            find_glitch()
def get_stats_over_time(start, stop=None, sp=False, dp=None, ir=False, ms=None,
                        slots='combined', t_samp=1000):
    """
    Equivalent to get_stats_per_interval, but concatenate the results for all
    obsids within the specified time interval.
    """
    # Get obsids in time range and collect all the per-interval statistics
    obsids = events.obsids.filter(start, stop, dur__gt=2000)
    stats_list = []
    for obsid in obsids:
        set_FILES_context(obsid.obsid, sp, dp, ir, ms, t_samp, slots)

        # First check that there is the raw dat file for this obsid.  Nothing
        # can be done without this.
        dat_file = FILES['dat.pkl'].rel
        if not os.path.exists(dat_file):
            logger.info('Skipping {}: {} not in archive'.format(obsid, dat_file))
            continue

        # Now get the stats for this obsid.  Hopefully it has already been computed and
        # is cached as a file.  If not, try to compute the stats (and cache).  If that
        # fails then press on but touch a file to indicate failure so subsequent attempts
        # don't bother.
        logger.info('Processing obsid {}'.format(obsid))
        try:
            stats = get_cached_stats()  # depends on the context set previously
        except FailedStatsFile:
            # Previously failed
            logger.info('  Skipping {}: failed statistics'.format(obsid.obsid))
            continue
        except NoStatsFile:
            logger.info('  Reading pickled data file {}'.format(dat_file))
            dat = pickle.load(open(dat_file, 'r'))
            try:
                logger.info('  Computing statistics')
                if slots == 'combined':
                    stats = get_stats_per_interval_combined(dat, sp, dp, ir, ms, t_samp)
                else:
                    stats = get_stats_per_interval_per_slot(dat, sp, dp, ir, ms, slots, t_samp)
            except ValueError as err:
                open(FILES['stats.ERR'].rel, 'w')  # touch file to indicate failure to compute stats
                logger.warn('  ERROR: {}'.format(err))

        stats['obsid'] = obsid.obsid
        stats_list.append(stats)

    stats = {}
    for case in STAT_CASES:
        stats[case] = {}
        for stat_type in STAT_TYPES:
            stats[case][stat_type] = np.hstack([x[case][stat_type] for x in stats_list])

    # Set corresponding array of obsids for back-tracing outliers etc
    stats['obsid'] = np.hstack([np.ones(len(x['obc']['std']), dtype=int) * x['obsid']
                                for x in stats_list])

    return stats
Exemple #35
0
def main():
    global opt, ft, msid_files, logger

    opt, args = get_options()
    ft = fetch.ft
    msid_files = pyyaks.context.ContextDict('add_derived.msid_files',
                                            basedir=opt.data_root)
    msid_files.update(file_defs.msid_files)
    logger = pyyaks.logger.get_logger(name='engarchive',
                                      level=pyyaks.logger.VERBOSE,
                                      format="%(asctime)s %(message)s")

    # Get the derived parameter classes
    dp_classes = (getattr(derived, x) for x in dir(derived)
                  if x.startswith('DP_'))
    dp_classes = [
        x for x in dp_classes
        if hasattr(x, '__base__') and issubclass(x, derived.DerivedParameter)
    ]
    content_defs = {}
    for dp_class in dp_classes:
        colname = dp_class.__name__.upper()
        dp = dp_class()
        content = dp.content
        if opt.content == [] or any(
                re.match(x + r'\d+', content) for x in opt.content):
            dpd = content_defs.setdefault(content, {})
            dpd.setdefault('classes', {'TIME': None})
            dpd['content'] = content
            dpd['classes'][colname] = dp_class
            dpd['mnf_step'] = dp.mnf_step
            dpd['time_step'] = dp.time_step

    for content, content_def in content_defs.items():
        ft['content'] = content
        logger.info('CONTENT = {}'.format(content))

        # Make content directory
        if not os.path.exists(msid_files['contentdir'].rel):
            logger.info('Making directory {}'.format(
                msid_files['contentdir'].rel))
            os.mkdir(msid_files['contentdir'].rel)

        # Make the archfiles.db3 file (if needed)
        make_archfiles_db(msid_files['archfiles'].abs, content_def)

        for colname in content_def['classes']:
            ft['msid'] = colname
            logger.debug('MSID = {}'.format(colname))
            # Create colnames and colnames_all pickle files (if needed) and add colname
            add_colname(msid_files['colnames'].rel, colname)
            add_colname(msid_files['colnames_all'].rel, colname)

            make_msid_file(colname, content, content_def)

        add_colname(msid_files['colnames_all'].rel, 'QUALITY')
Exemple #36
0
def get_index_tbl(content, logger, opt):
    # Read the index file to know what is available for new data
    with get_readable(opt.sync_root, opt.is_url, sync_files['index']) as (index_input, uri):
        if index_input is None:
            # If index_file is not found then get_readable returns None
            logger.info(f'No new sync data for {content}: {uri} not found')
            return None
        logger.info(f'Reading index file {uri}')
        index_tbl = Table.read(index_input, format='ascii.ecsv')
    return index_tbl
Exemple #37
0
def read_archfile(i, f, filetype, row, colnames, archfiles, db):
    """Read filename ``f`` with index ``i`` (position within list of filenames).  The
    file has type ``filetype`` and will be added to MSID file at row index ``row``.
    ``colnames`` is the list of column names for the content type (not used here).
    """
    # Check if filename is already in archfiles.  If so then abort further processing.
    filename = os.path.basename(f)
    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?',
                   (filename, )):
        logger.verbose(
            'File %s already in archfiles - unlinking and skipping' % f)
        os.unlink(f)
        return None, None

    # Read FITS archive file and accumulate data into dats list and header into headers dict
    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    hdus = pyfits.open(f)
    hdu = hdus[1]

    try:
        dat = converters.convert(hdu.data, filetype['content'])

    except converters.NoValidDataError:
        # When creating files allow NoValidDataError
        hdus.close()
        logger.warning(
            'WARNING: no valid data in data file {}'.format(filename))
        return None, None

    except converters.DataShapeError as err:
        hdus.close()
        logger.warning(
            'WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'.
            format(filename, hdu.header['ASCDSVER'], err))
        return None, None

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    archfiles_row = dict(
        (x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols)
    archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum
    archfiles_row['rowstart'] = row
    archfiles_row['rowstop'] = row + len(dat)
    archfiles_row['filename'] = filename
    archfiles_row['filetime'] = int(
        re.search(r'(\d+)', archfiles_row['filename']).group(1))
    filedate = DateTime(archfiles_row['filetime']).date
    year, doy = (int(x)
                 for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups())
    archfiles_row['year'] = year
    archfiles_row['doy'] = doy
    hdus.close()

    return dat, archfiles_row
def get_obsid_data(obsid):
    filename = os.path.join('data', str(obsid) + '.pkl')
    if os.path.exists(filename):
        dat = pickle.load(open(filename, 'r'))
    else:
        import update_flags_archive
        dat = update_flags_archive.get_obsid(obsid)
        pickle.dump(dat, open(filename, 'w'), protocol=-1)
        logger.info('Wrote data for {}'.format(obsid))

    return dat
Exemple #39
0
def cut_stars(ai):
    starfiles = glob(os.path.join(ai['outdir'],
                                  "*stars.txt"))
    shutil.copy(starfiles[0], starfiles[0] + ".orig")
    starlines = open(starfiles[0]).read().split("\n")
    for slot in ai['skip_slot']:
        starlines = [i for i in starlines
                     if not re.match("^\s+{}\s+1.*".format(slot), i)]
    logger.info('Cutting stars by updating {}'.format(starfiles[0]))
    with open(starfiles[0], "w") as newlist:
        newlist.write("\n".join(starlines))
Exemple #40
0
def main(args=None):
    global logger

    opt = get_opt(args)

    logger = pyyaks.logger.get_logger(name='kadi', level=opt.log_level,
                                      format="%(asctime)s %(message)s")

    log_run_info(logger.info, opt)

    # Set the global root data directory.  This gets used in ..paths to
    # construct file names.  The use of an env var is needed to allow
    # configurability of the root data directory within django.
    os.environ['KADI'] = os.path.abspath(opt.data_root)
    idx_cmds_path = IDX_CMDS_PATH()
    pars_dict_path = PARS_DICT_PATH()

    try:
        with open(pars_dict_path, 'rb') as fh:
            pars_dict = pickle.load(fh)
        logger.info('Read {} pars_dict values from {}'.format(len(pars_dict), pars_dict_path))
    except IOError:
        logger.info('No pars_dict file {} found, starting from empty dict'
                    .format(pars_dict_path))
        pars_dict = {}

    if not opt.mp_dir:
        for prefix in ('/', os.environ['SKA']):
            pth = Path(prefix, 'data', 'mpcrit1', 'mplogs')
            if pth.exists():
                opt.mp_dir = str(pth)
                break
        else:
            raise FileNotFoundError('no mission planning directories found (need --mp-dir)')
    logger.info(f'Using mission planning files at {opt.mp_dir}')

    # Recast as dict subclass that remembers if any element was updated
    pars_dict = UpdatedDict(pars_dict)

    stop = DateTime(opt.stop) if opt.stop else DateTime() + 21
    start = DateTime(opt.start) if opt.start else stop - 42

    cmds = get_cmds(start, stop, opt.mp_dir)
    idx_cmds = get_idx_cmds(cmds, pars_dict)
    add_h5_cmds(idx_cmds_path, idx_cmds)

    if pars_dict.n_updated > 0:
        with open(pars_dict_path, 'wb') as fh:
            pickle.dump(pars_dict, fh, protocol=2)
            logger.info('Wrote {} pars_dict values ({} new) to {}'
                        .format(len(pars_dict), pars_dict.n_updated, pars_dict_path))
    else:
        logger.info('pars_dict was unmodified, not writing')
def add_asol_to_h5(filename, asol):
    asol = asol.as_array()
    h5 = tables.openFile(filename, mode='a',
                         filters=tables.Filters(complevel=5, complib='zlib'))
    try:
        logger.info('Appending {} records to {}'.format(len(asol), filename))
        h5.root.data.append(asol)
    except tables.NoSuchNodeError:
        logger.info('Creating {}'.format(filename))
        h5.createTable(h5.root, 'data', asol, "Aimpoint drift", expectedrows=1e6)
    h5.root.data.flush()
    h5.close()
def check_filetype(filetype):
    ft['content'] = filetype.content.lower()

    if not os.path.exists(msid_files['archfiles'].abs):
        logger.info('No archfiles.db3 for %s - skipping' % ft['content'])
        return

    logger.info('Checking {} content type, archfiles {}'.format(
        ft['content'], msid_files['archfiles'].abs))

    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    archfiles = db.fetchall('select * from archfiles')
    db.conn.close()

    if opt.check_order:
        for archfile0, archfile1 in zip(archfiles[:-1], archfiles[1:]):
            exception = (archfile0['startmjf'] == 77826 and
                         archfile0['year'] == 2004 and archfile0['doy'] == 309)
            if archfile1['tstart'] < archfile0['tstart'] and not exception:
                logger.info('ERROR: archfile order inconsistency\n {}\n{}'
                            .format(archfile0, archfile1))

    if not opt.check_lengths:
        colnames = ['TIME']
    else:
        colnames = [x for x in pickle.load(open(msid_files['colnames'].abs))
                    if x not in fetch.IGNORE_COLNAMES]

    lengths = set()
    for colname in colnames:
        ft['msid'] = colname

        h5 = tables.openFile(msid_files['msid'].abs, mode='r')
        length = len(h5.root.data)
        h5.root.data[length - 1]
        h5.close()

        logger.verbose('MSID {} has length {}'.format(colname, length))
        lengths.add(length)
        if len(lengths) != 1:
            logger.info('ERROR: inconsistent MSID length {} {} {}'.format(
                ft['content'], colname, lengths))
            return  # Other checks don't make sense now

    length = lengths.pop()

    archfile = archfiles[-1]
    if archfile['rowstop'] != length:
        logger.info('ERROR: inconsistent archfile {}: '
                    'last rowstop={} MSID length={}'.format(
            ft['content'], archfile['rowstop'], length))
        if opt.find_glitch:
            find_glitch()
Exemple #43
0
def copy_dark_image():
    """
    Copy dark cal image from Ska to Mica
    """
    outdir = MICA_FILES['dark_cal_dir'].abs
    if not os.path.exists(outdir):
        logger.info('Making output dark cal directory {}'.format(outdir))
        os.makedirs(outdir)

    infile = SKA_FILES['dark_image.fits'].abs
    outfile = MICA_FILES['dark_image.fits'].abs
    logger.info('Copying {} to {}'.format(infile, outfile))
    shutil.copy(infile, outfile)
Exemple #44
0
def get_archive_files(filetype):
    """Update FITS file archive with arc5gl and ingest files into msid (HDF5) archive"""

    # If running on the OCC GRETA network the cwd is a staging directory that
    # could already have files.  Also used in testing.
    # Don't allow arbitrary arch files at once because of memory issues.
    files = sorted(glob.glob(filetype['fileglob']))
    if opt.occ or files:
        return sorted(files)[:opt.max_arch_files]

    # Retrieve CXC archive files in a temp directory with arc5gl
    arc5 = Ska.arc5gl.Arc5gl(echo=True)

    # End time for archive queries (minimum of start + max_query_days and NOW)
    datestop = DateTime(opt.date_now)

    # Get datestart as the most-recent file time from archfiles table.  However,
    # do not look back further than --max-lookback-time
    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    vals = db.fetchone("select max(filetime) from archfiles")
    datestart = DateTime(
        max(vals['max(filetime)'],
            datestop.secs - opt.max_lookback_time * 86400))

    # For *ephem0 the query needs to extend well into the future
    # to guarantee getting all available files.  This is the archives fault.
    if filetype['level'] == 'L0' and filetype['instrum'] == 'EPHEM':
        datestop = datestop + 50

    # For instrum==EPHEM break queries into time ranges no longer than
    # 100000 sec each.  EPHEM files are at least 7 days long and generated
    # no more often than every ~3 days so this should work.
    n_queries = (1 if filetype['instrum'] != 'EPHEM' else 1 + round(
        (datestop.secs - datestart.secs) / 100000.))
    times = np.linspace(datestart.secs, datestop.secs, n_queries + 1)

    logger.info('********** %s %s **********' % (ft['content'], time.ctime()))

    for t0, t1 in zip(times[:-1], times[1:]):
        if t1 > t0:
            arc5.sendline('tstart=%s' % DateTime(t0).date)
            arc5.sendline('tstop=%s' % DateTime(t1).date)
            arc5.sendline('get %s' % filetype['arc5gl_query'].lower())
        else:
            logger.info(
                'INFO: Skipping archive query because datestop={} < datestart={}'
                .format(DateTime(t1).date,
                        DateTime(t0).date))

    return sorted(glob.glob(filetype['fileglob']))
Exemple #45
0
def end(message=None, context_file=None, context_keys=None):
    """End a pipeline sequence."""
    
    if context_file is not None:
        store_context(context_file, context_keys)

    if message is not None:
        logger.info('')
        logger.info('*' * 60)
        logger.info('** %-54s **' % (pyyaks.context.render(message)
                                     + (' FAILED' if status['fail'] else ' SUCCEEDED')))
        logger.info('*' * 60)
        logger.info('')
    status['fail'] = False
def plot_observed_aimpoints(obs_aimpoints):
    """
    Make png and html (mpld3) plot of data in the ``obs_aimpoints`` table.
    """
    plt.close(1)
    fig = plt.figure(1, figsize=(8, 4))

    dates = DateTime(obs_aimpoints['mean_date'])
    years = dates.frac_year
    times = dates.secs
    ok = years > np.max(years) - float(opt.lookback) / 365.25
    obs_aimpoints = obs_aimpoints[ok]
    times = times[ok]

    lolims = {}
    uplims = {}
    for axis in ('dx', 'dy'):
        lolims[axis] = obs_aimpoints[axis] > 10
        uplims[axis] = obs_aimpoints[axis] < -10
        obs_aimpoints[axis] = obs_aimpoints[axis].clip(-10, 10)

    ok = ((np.abs(obs_aimpoints['target_offset_y']) < 100) &
          (np.abs(obs_aimpoints['target_offset_z']) < 100))
    plot_cxctime(times[ok], obs_aimpoints['dx'][ok], 'ob', label='CHIPX')
    plot_cxctime(times[ok], obs_aimpoints['dy'][ok], 'or', label='CHIPY')
    plot_cxctime(times[~ok], obs_aimpoints['dx'][~ok], '*b', label='CHIPX (offset > 100")')
    plot_cxctime(times[~ok], obs_aimpoints['dy'][~ok], '*r', label='CHIPY (offset > 100")')

    for axis in ('dx', 'dy'):
        if np.any(lolims[axis]):
            plt.errorbar(DateTime(times[lolims[axis]]).plotdate,
                         obs_aimpoints[axis][lolims[axis]], marker='.', yerr=1.5, lolims=True)
        if np.any(uplims[axis]):
            plt.errorbar(DateTime(times[uplims[axis]]).plotdate,
                         obs_aimpoints[axis][uplims[axis]], marker='.', yerr=1.5, uplims=True)

    plt.grid()
    ymax = max(12, np.max(np.abs(obs_aimpoints['dx'])), np.max(np.abs(obs_aimpoints['dy'])))
    plt.ylim(-ymax, ymax)
    plt.ylabel('Offset (arcsec)')
    plt.title('Observed aimpoint offsets')

    plt.legend(loc='upper left', fontsize='small', title='', framealpha=0.5)

    outroot = os.path.join(opt.data_root, 'observed_aimpoints')
    logger.info('Writing plot files {}.png,html'.format(outroot))
    mpld3.plugins.connect(fig, mpld3.plugins.MousePosition(fmt='.1f'))
    mpld3.save_html(fig, outroot + '.html')
    fig.patch.set_visible(False)
    plt.savefig(outroot + '.png', frameon=False)
Exemple #47
0
def run_ai(ais):
    """
    Run aspect pipeline 'flt_run_pipe' over the aspect intervals described
    in the list of dictionaries passed as an argument
    """
    ascds_env = getenv('source /home/ascds/.ascrc -r release', shell='tcsh')
    tcsh_shell("punlearn asp_l1_std", env=ascds_env)
    ocat_env = getenv(
        'source /proj/sot/ska/data/aspect_authorization/set_ascds_ocat_vars.csh',
        shell='tcsh')
    for var in ['ASCDS_OCAT_UNAME', 'ASCDS_OCAT_SERVER', 'ASCDS_OCAT_PWORD']:
        ascds_env[var] = ocat_env[var]
    if opt.param is not None and len(opt.param):
        for param in opt.param:
            cmd = 'pset asp_l1_std {}'.format(param)
            tcsh_shell(cmd,
                       env=ascds_env)
    logger_fh = FilelikeLogger(logger)

    for ai in ais:
        pipe_cmd = 'flt_run_pipe -r {root} -i {indir} -o {outdir} \
-t {pipe_ped} \
-a "INTERVAL_START"={istart} \
-a "INTERVAL_STOP"={istop} \
-a obiroot={obiroot} \
-a revision=1 '.format(**ai)
        if 'pipe_start_at' in ai:
            pipe_cmd = pipe_cmd + " -s {}".format(ai['pipe_start_at'])
        if 'pipe_stop_before' in ai:
            pipe_cmd = pipe_cmd + " -S {}".format(ai['pipe_stop_before'])
        if 'skip_slot' in ai:
            try:
                tcsh_shell(pipe_cmd + " -S check_star_data",
                           env=ascds_env,
                           logfile=logger_fh)
            except ShellError as sherr:
                # if shell error, just check to see if get_star_data completed successfully
                loglines = open(logger_fh.filename).read()
                if not re.search("get_star_data completed successfully", loglines):
                    raise ShellError(sherr)
            cut_stars(ai)
            tcsh_shell(pipe_cmd + " -s check_star_data",
                       env=ascds_env,
                       logfile=logger_fh)
        else:
            logger.info('Running pipe command {}'.format(pipe_cmd))
            tcsh_shell(pipe_cmd,
                       env=ascds_env,
                       logfile=logger_fh)
Exemple #48
0
def main():
    global opt, ft, msid_files, logger

    opt, args = get_options()
    ft = fetch.ft
    msid_files = pyyaks.context.ContextDict('add_derived.msid_files', basedir=opt.data_root)
    msid_files.update(file_defs.msid_files)
    logger = pyyaks.logger.get_logger(name='engarchive', level=pyyaks.logger.VERBOSE, 
                                      format="%(asctime)s %(message)s")

    # Get the derived parameter classes
    dp_classes = (getattr(derived, x) for x in dir(derived) if x.startswith('DP_'))
    dp_classes = [x for x in dp_classes if hasattr(x, '__base__') and
                                           issubclass(x, derived.DerivedParameter)]
    content_defs = {}
    for dp_class in dp_classes:
        colname = dp_class.__name__.upper()
        dp = dp_class()
        content = dp.content
        if opt.content == [] or any(re.match(x + r'\d+', content) for x in opt.content):
            dpd = content_defs.setdefault(content, {})
            dpd.setdefault('classes', {'TIME': None})
            dpd['content'] = content
            dpd['classes'][colname] = dp_class
            dpd['mnf_step'] = dp.mnf_step
            dpd['time_step'] = dp.time_step

    for content, content_def in content_defs.items():
        ft['content'] = content
        logger.info('CONTENT = {}'.format(content))

        # Make content directory
        if not os.path.exists(msid_files['contentdir'].rel):
            logger.info('Making directory {}'.format(msid_files['contentdir'].rel))
            os.mkdir(msid_files['contentdir'].rel)

        # Make the archfiles.db3 file (if needed)
        make_archfiles_db(msid_files['archfiles'].abs, content_def)

        for colname in content_def['classes']:
            ft['msid'] = colname
            logger.debug('MSID = {}'.format(colname))
            # Create colnames and colnames_all pickle files (if needed) and add colname
            add_colname(msid_files['colnames'].rel, colname)
            add_colname(msid_files['colnames_all'].rel, colname)

            make_msid_file(colname, content, content_def)

        add_colname(msid_files['colnames_all'].rel, 'QUALITY')
def get_baseline_characteristics_file():
    """
    Get the most recent (presumed to be baseline) OFLS characteristics file on the OCCweb
    configuration directory.

    :returns: file path (including characteristics/ subdirectory)
    """
    # Get the directory of available files.  This is an HTML doc which consists of single
    # a list of links.
    logger.info('Getting baseline characteristics file')
    occweb.URLS['char_constr'] = '/occweb/FOT/configuration/documents/Characteristics_Constraints/'
    html = occweb.get_url('char_constr')
    html = scrape.cleantext(html)
    page = parse_html(html)

    # Matches CHARACTERIS_DDMMMYY
    RE_CHAR_FILE = re.compile(r'CHARACTERIS_ (?P<date> \d\d [A-Z]{3} \d\d) $', re.VERBOSE)

    # Find every link tag in the document and inspect every one with
    # href=<valid characteristics name>
    dates = []
    filenames = []
    links = page.findAll('a')
    for link in links:
        # If link reference matches regex then include for processing
        filename = link.attrs['href']
        match = RE_CHAR_FILE.match(filename)
        if match:
            date = datetime.strptime(match.group('date'), '%d%b%y')
            dates.append(Time(date).yday)
            filenames.append(filename)
            logger.info('  Valid file: {}'.format(filename))
        else:
            logger.info('  Skipping invalid href: {}'.format(filename))

    # Get the filename for the most recent file
    filename = filenames[np.argmax(dates)]
    logger.info('{} is the most recent characteristics file'.format(filename))

    occweb.URLS[filename] = occweb.URLS['char_constr'] + '/' + filename
    logger.info('Fetching {}'.format(occweb.URLS[filename]))
    html = occweb.get_url(filename)

    outfile = os.path.join(opt.data_root, 'characteristics', filename)
    if not os.path.exists(outfile):
        with open(outfile, 'w') as fs:
            fs.write(html)

    return outfile
Exemple #50
0
def delete_from_date(EventModel, start, set_update_date=True):
    from .events import models

    date_start = DateTime(start).date
    cls_name = EventModel.__name__

    if set_update_date:
        update = models.Update.objects.get(name=cls_name)
        logger.info("Updating {} date from {} to {}".format(cls_name, update.date, date_start))
        update.date = date_start
        update.save()

    events = EventModel.objects.filter(start__gte=date_start)
    logger.info("Deleting {} {} events after {}".format(events.count(), cls_name, date_start))
    events.delete()
Exemple #51
0
def plot_housing_temperature():
    dat = fetch.Msid('aach1t', '2000:001', stat='daily')
    plt.close(1)
    fig = plt.figure(figsize=(8, 4))
    year = Time(dat.times, format='cxcsec').decimalyear
    plt.plot(year, dat.vals)
    plt.grid()
    plt.xlabel('Year')
    plt.ylabel('Temperature (degF)')
    plt.title('Aspect Camera housing temperature trend')

    outroot = os.path.join(opt.data_root, 'aca_housing_temperature')
    logger.info('Writing plot files {}.png'.format(outroot))
    fig.patch.set_visible(False)
    plt.savefig(outroot + '.png', facecolor="none")
def get_cached_stats(root='stats'):
    stats_file = FILES['{}.pkl'.format(root)].rel
    failfile = FILES['{}.ERR'.format(root)].rel

    # If this was already computed then return the on-disk version
    if os.path.exists(stats_file):
        logger.info('Reading {}'.format(stats_file))
        all_stats = pickle.load(open(stats_file, 'r'))
        return all_stats

    elif os.path.exists(failfile):
        raise FailedStatsFile('Known fail: file {} exists'.format(failfile))

    else:
        raise NoStatsFile
Exemple #53
0
def get_ending_roll_err(obsid, metrics_file=None):

    # Check for the value in the obsid file, otherwise recalculate
    saved_metrics = []
    if metrics_file is not None and os.path.exists(metrics_file):
        saved_metrics = Table.read(metrics_file, format='ascii.ecsv')
    if len(saved_metrics) and obsid in saved_metrics['obsid']:
        logger.info(f"Getting ending roll for {obsid} from file")
        return saved_metrics[saved_metrics['obsid'] == obsid][0]['ending_roll_err']
    else:
        att_errors = get_observed_att_errors(obsid, on_the_fly=True)
        if att_errors is None:
            return -9999
        else:
            return att_errors['dr'][-1]
Exemple #54
0
def get_cd_dir(obsid, data_root):
    """
    Check if the centroid dashbord directory exists for the requested obsid,
    and create it if needed
    """
    if obsid == -1:
        return ""

    cd_obsid_root = os.path.join(data_root, np.str(obsid)[:2], f"{obsid}")

    if not os.path.exists(cd_obsid_root):
        os.makedirs(cd_obsid_root)
        logger.info(f'Creating directory {cd_obsid_root}')

    return cd_obsid_root
Exemple #55
0
def read_metrics_from_file(filename):
    """
    Read in processed guide metrics (dr95, dr50, manvr_angle, ending dr,
    one shot updates, aber corrections) from file
    """
    if os.path.exists(filename):
        logger.info(f'Reading {filename}')
        dat_old = Table.read(filename, format='ascii.ecsv', guess=False)
        processed_obsids = set(dat_old['obsid'])
    else:
        logger.info(f'File {filename} does not exist')
        dat_old = None
        processed_obsids = set()

    return dat_old, processed_obsids
Exemple #56
0
def add_colname(filename, colname):
    """Add ``colname`` to the pickled set() in ``filename``.  Create the pickle
    as needed.
    """
    if not os.path.exists(filename):
        logger.info('Creating colnames pickle {}'.format(filename))
        with open(filename, 'w') as f:
            pickle.dump(set(), f)

    colnames = pickle.load(open(filename, 'r'))
    if colname not in colnames:
        logger.info('Adding colname {} to colnames pickle {}'.format(colname, filename))
        colnames.add(colname)
        with open(filename, 'w') as f:
            pickle.dump(colnames, f)
def get_archive_files(filetype):
    """Update FITS file archive with arc5gl and ingest files into msid (HDF5) archive"""

    # If running on the OCC GRETA network the cwd is a staging directory that
    # could already have files.  Also used in testing.
    # Don't return more than opt.max_arch_files files at once because of memory
    # issues on gretasot.  This only comes up when there has been some problem or stoppage.
    files = sorted(glob.glob(filetype['fileglob']))
    if opt.occ or files:
        return sorted(files)[:opt.max_arch_files]

    # Retrieve CXC archive files in a temp directory with arc5gl
    arc5 = Ska.arc5gl.Arc5gl(echo=True)

    # End time for archive queries (minimum of start + max_query_days and NOW)
    datestop = DateTime(opt.date_now)

    # Get datestart as the most-recent file time from archfiles table.  However,
    # do not look back further than --max-lookback-time
    db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs)
    vals = db.fetchone("select max(filetime) from archfiles")
    datestart = DateTime(max(vals['max(filetime)'],
                             datestop.secs - opt.max_lookback_time * 86400))

    # For *ephem0 the query needs to extend well into the future
    # to guarantee getting all available files.  This is the archives fault.
    if filetype['level'] == 'L0' and filetype['instrum'] == 'EPHEM':
        datestop = datestop + 50

    # For instrum==EPHEM break queries into time ranges no longer than
    # 100000 sec each.  EPHEM files are at least 7 days long and generated
    # no more often than every ~3 days so this should work.
    n_queries = (1 if filetype['instrum'] != 'EPHEM'
                 else 1 + round((datestop.secs - datestart.secs) / 100000.))
    times = np.linspace(datestart.secs, datestop.secs, n_queries + 1)

    logger.info('********** %s %s **********' % (ft['content'], time.ctime()))

    for t0, t1 in zip(times[:-1], times[1:]):
        if t1 > t0:
            arc5.sendline('tstart=%s' % DateTime(t0).date)
            arc5.sendline('tstop=%s' % DateTime(t1).date)
            arc5.sendline('get %s' % filetype['arc5gl_query'].lower())
        else:
            logger.info('INFO: Skipping archive query because datestop={} < datestart={}'
                        .format(DateTime(t1).date, DateTime(t0).date))

    return sorted(glob.glob(filetype['fileglob']))
Exemple #58
0
def read_archfile(i, f, filetype, row, colnames, archfiles, db):
    """Read filename ``f`` with index ``i`` (position within list of filenames).  The
    file has type ``filetype`` and will be added to MSID file at row index ``row``.
    ``colnames`` is the list of column names for the content type (not used here).
    """
    # Check if filename is already in archfiles.  If so then abort further processing.
    filename = os.path.basename(f)
    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)):
        logger.verbose('File %s already in archfiles - unlinking and skipping' % f)
        os.unlink(f)
        return None, None

    # Read FITS archive file and accumulate data into dats list and header into headers dict
    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    hdus = pyfits.open(f, character_as_bytes=True)
    hdu = hdus[1]

    try:
        dat = converters.convert(hdu.data, filetype['content'])

    except converters.NoValidDataError:
        # When creating files allow NoValidDataError
        hdus.close()
        logger.warning('WARNING: no valid data in data file {}'.format(filename))
        return None, None

    except converters.DataShapeError as err:
        hdus.close()
        logger.warning('WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'
                       .format(filename, hdu.header['ASCDSVER'], err))
        return None, None

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    archfiles_row = dict((x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols)
    archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum
    archfiles_row['rowstart'] = row
    archfiles_row['rowstop'] = row + len(dat)
    archfiles_row['filename'] = filename
    archfiles_row['filetime'] = int(re.search(r'(\d+)', archfiles_row['filename']).group(1))
    filedate = DateTime(archfiles_row['filetime']).date
    year, doy = (int(x) for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups())
    archfiles_row['year'] = year
    archfiles_row['doy'] = doy
    hdus.close()

    return dat, archfiles_row