def move_archive_files(filetype, archfiles): ft['content'] = filetype.content.lower() stagedir = arch_files['stagedir'].abs if not os.path.exists(stagedir): os.makedirs(stagedir) for f in archfiles: if not os.path.exists(f): continue ft['basename'] = os.path.basename(f) tstart = re.search(r'(\d+)', str(ft['basename'])).group(1) datestart = DateTime(tstart).date ft['year'], ft['doy'] = re.search(r'(\d\d\d\d):(\d\d\d)', datestart).groups() archdir = arch_files['archdir'].abs archfile = arch_files['archfile'].abs if not os.path.exists(archdir): os.makedirs(archdir) if not os.path.exists(archfile): logger.info('mv %s %s' % (os.path.abspath(f), archfile)) if not opt.dry_run: if not opt.occ: shutil.copy2(f, stagedir) shutil.move(f, archfile) if os.path.exists(f): logger.verbose('Unlinking %s' % os.path.abspath(f)) os.unlink(f)
def get_evt_meta(obsid, detector): """ Get event file metadata (FITS keywords) for ``obsid`` and ``detector``. Returns a dict of key=value pairs, or None if there is no data in archive. """ logger.info(f'Getting {obsid} {detector} from archive') det = 'hrc' if detector.startswith('HRC') else 'acis' arc5gl = Ska.arc5gl.Arc5gl() arc5gl.sendline('obsid={}'.format(obsid)) arc5gl.sendline('get {}2'.format(det) + '{evt2}') del arc5gl files = glob.glob('{}f{}*_evt2.fits.gz'.format(det, obsid)) if len(files) == 0: raise NoObsidError('No event file found for obsid {}'.format(obsid)) if len(files) > 1: raise TooManyFilesError('Wrong number of files {}'.format(files)) with warnings.catch_warnings(): warnings.simplefilter('ignore') evt2 = Table.read(files[0], hdu=1) os.unlink(files[0]) evt = {k.lower(): v for k, v in evt2.meta.items()} evt['obs_chipx'], evt['obs_chipy'], evt[ 'obs_chip_id'] = dmcoords_chipx_chipy(evt) return evt
def copy_statfiles_to_test(stat, dt, tstart, tstop): ft['interval'] = stat colnames = pickle.load(open(msid_files['colnames'].abs)) for colname in colnames: ft['msid'] = colname if os.path.exists(test_msid_files['stats'].abs): continue if os.path.exists(msid_files['stats'].abs): logger.info('Copying {0} stats for MSID {1}'.format(stat, colname)) statdir = os.path.dirname(test_msid_files['stats.tmp'].abs) if not os.path.exists(statdir): os.makedirs(statdir) shutil.copy(msid_files['stats'].abs, test_msid_files['stats.tmp'].abs) h5 = tables.openFile(test_msid_files['stats.tmp'].abs, 'a') times = (h5.root.data.col('index') + 0.5) * dt row0, row1 = np.searchsorted(times, [tstart, tstop]) #print colname, row0, row1, len(times), DateTime(times[row0]).date, DateTime(times[row1]).date, # Remove from row1-1 to end. The row1-1 is because it is possible # to get the daily stat without the rest of the 5min data if # tstop is past noon of the day. This messes up update_archive. h5.root.data.removeRows(row1 - 1, h5.root.data.nrows) h5.root.data.removeRows(0, row0) h5.copyFile(test_msid_files['stats'].abs, overwrite=True) newtimes = (h5.root.data.col('index') + 0.5) * dt #print len(newtimes), DateTime(newtimes[0]).date, DateTime(newtimes[-1]).date h5.close() os.unlink(test_msid_files['stats.tmp'].abs)
def del_stats(colname, time0, interval): """Delete all rows in ``interval`` stats file for column ``colname`` that occur after time ``time0`` - ``interval``. This is used to fix problems that result from a file misorder. Subsequent runs of update_stats will refresh the values correctly. """ dt = {'5min': 328, 'daily': 86400}[interval] ft['msid'] = colname ft['interval'] = interval stats_file = msid_files['stats'].abs if not os.path.exists(stats_file): raise IOError('Stats file {} not found'.format(stats_file)) logger.info('Fixing stats file %s after time %s', stats_file, DateTime(time0).date) stats = tables.openFile(stats_file, mode='a', filters=tables.Filters(complevel=5, complib='zlib')) index0 = time0 // dt - 1 indexes = stats.root.data.col('index')[:] row0 = np.searchsorted(indexes, [index0])[0] - 1 if opt.dry_run: n_del = len(stats.root.data) - row0 else: n_del = stats.root.data.removeRows(row0, len(stats.root.data)) logger.info('Deleted %d rows from row %s (%s) to end', n_del, row0, DateTime(indexes[row0] * dt).date) stats.close()
def try4times(func, *arg, **kwarg): """ Work around problems with sqlite3 database getting locked out from writing, presumably due to read activity. Not completely understood. This function will try to run func(*arg, **kwarg) a total of 4 times with an increasing sequence of wait times between tries. It catches only a database locked error. """ from django.db.utils import OperationalError for delay in 0, 5, 10, 60: if delay > 0: time.sleep(delay) try: func(*arg, **kwarg) except OperationalError as err: if 'database is locked' in str(err): # Locked DB, issue informational warning logger.info( 'Warning: locked database, waiting {} seconds'.format( delay)) else: # Something else so just re-raise raise else: # Success, jump out of loop break else: # After 4 tries bail out with an exception raise OperationalError('database is locked')
def read_derived(i, filename, filetype, row, colnames, archfiles, db): """Read derived data using eng_archive and derived computation classes. ``filename`` has format <content>_<index0>_<index1> where <content> is the content type (e.g. "dp_thermal128"), <index0> is the start index for the new data and index1 is the end index (using Python slicing convention index0:index1). Args ``i``, ``filetype``, and ``row`` are as in read_archive(). ``row`` must equal <index0>. ``colnames`` is the list of column names for the content type. """ # Check if filename is already in archfiles. If so then abort further processing. if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)): logger.verbose('File %s already in archfiles - skipping' % filename) return None, None # f has format <content>_<index0>_<index1> # <content> has format dp_<content><mnf_step> e.g. dp_thermal128 content, index0, index1 = filename.split(':') index0 = int(index0) index1 = int(index1) mnf_step = int(re.search(r'(\d+)$', content).group(1)) time_step = mnf_step * derived.MNF_TIME times = time_step * np.arange(index0, index1) logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename)) vals = {} bads = np.zeros((len(times), len(colnames)), dtype=np.bool) for i, colname in enumerate(colnames): if colname == 'TIME': vals[colname] = times bads[:, i] = False else: dp_class = getattr(Ska.engarchive.derived, colname.upper()) dp = dp_class() dataset = dp.fetch(times[0] - 1000, times[-1] + 1000) ok = (index0 <= dataset.indexes) & (dataset.indexes < index1) vals[colname] = dp.calc(dataset)[ok] bads[:, i] = dataset.bads[ok] vals['QUALITY'] = bads dat = Ska.Numpy.structured_array(vals, list(colnames) + ['QUALITY']) # Accumlate relevant info about archfile that will be ingested into # MSID h5 files. Commit info before h5 ingest so if there is a failure # the needed info will be available to do the repair. date = DateTime(times[0]).date year, doy = date[0:4], date[5:8] archfiles_row = dict(filename=filename, filetime=int(index0 * time_step), year=year, doy=doy, tstart=times[0], tstop=times[-1], rowstart=row, rowstop=row + len(dat), startmjf=index0, stopmjf=index1, date=date) return dat, archfiles_row
def remove_outdated_sync_files(opt, logger, index_tbl, index_file): """ Remove the sync data dirs and index file rows which correspond to data so that no more than ``opt.max_sync_dirs`` sync directories are retained. :param opt: options :param logger: logger :param index_tbl: table containing sync repo entries :param index_file: index file Path :return: mask of rows that were removed """ # If index table is not too long then no action required. if len(index_tbl) <= opt.max_sync_dirs: return # Index before which rows will be deleted. Note that index_tbl is guaranteed to be # sorted by in ascending order by row (and thus by time) because of # check_index_tbl_consistency() idx0 = len(index_tbl) - opt.max_sync_dirs # Iterate over rows to be deleted and delete corresponding file directories. for row in index_tbl[:idx0]: fetch.ft['date_id'] = row['date_id'] data_dir = sync_files['data_dir'].abs if Path(data_dir).exists(): logger.info(f'Removing sync directory {data_dir}') shutil.rmtree(data_dir) index_tbl = index_tbl[idx0:] logger.info(f'Writing {len(index_tbl)} row(s) to index file {index_file}') index_tbl.write(index_file, format='ascii.ecsv', overwrite=True)
def remove_outdated_sync_files(opt, logger, index_tbl): """ Remove the sync data dirs and index file rows which correspond to data that is more than opt.max_lookback days older than opt.date_stop (typically NOW). :param opt: options :param logger: logger :param index_tbl: table containing sync repo entries :return: mask of rows that were removed """ min_time = (DateTime(opt.date_stop) - opt.max_lookback).secs # Ephemeris files are time stamped around a month before current date, # so leave them around for couple months longer. if re.search(r'ephem\d$', str(fetch.ft['content'])): min_time -= 60 * 86400 remove_mask = np.zeros(len(index_tbl), dtype=bool) # Iterate over all but the last row of the table, removing any # directories for updates from before `min_time`. Leaving the last # row gives a direct record of when the last update occurred, but is # benign from the perspective of updating the client archive. for idx, row in zip(range(len(index_tbl) - 1), index_tbl): if row['filetime0'] < min_time: fetch.ft['date_id'] = row['date_id'] remove_mask[idx] = True data_dir = sync_files['data_dir'].abs if Path(data_dir).exists(): logger.info(f'Removing sync directory {data_dir}') shutil.rmtree(data_dir) return remove_mask
def main(): global opt opt = get_opt() info = {'date': opt.stop, 'start': opt.start, 'stop': opt.stop, 'box_duration_months': opt.box_duration} asol_aimpoint = get_asol(info) asol_monthly = AsolBinnedStats(asol_aimpoint, 365.25 / 12) for det in ('ACIS-S', 'ACIS-I'): asol_monthly.det = det det_title = asol_monthly.det_title info[det_title] = asol_monthly.get_chip_x_y_info() asol_monthly.plot_chip_x_y(info[det_title]) asol_monthly.plot_intra_obs_dy_dz() plot_housing_temperature() info_file = os.path.join(opt.data_root, 'info.json') with open(info_file, 'w') as fh: logger.info('Writing info file {}'.format(info_file)) json.dump(make_pure_python(info), fh, indent=4, sort_keys=True)
def make_msid_file(colname, content, content_def): ft['content'] = content ft['msid'] = colname filename = msid_files['data'].abs if os.path.exists(filename): return logger.info('Making MSID data file %s', filename) if colname == 'TIME': dp_vals, indexes = derived.times_indexes(opt.start, opt.stop, content_def['time_step']) else: dp = content_def['classes'][colname]() dataset = dp.fetch(opt.start, opt.stop) dp_vals = np.asarray(dp.calc(dataset), dtype=dp.dtype) # Finally make the actual MSID data file filters = tables.Filters(complevel=5, complib='zlib') h5 = tables.openFile(filename, mode='w', filters=filters) n_rows = int(20 * 3e7 / content_def['time_step']) h5shape = (0,) h5type = tables.Atom.from_dtype(dp_vals.dtype) h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname, expectedrows=n_rows) h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality', expectedrows=n_rows) logger.info('Made {} shape={} with n_rows(1e6)={}'.format(colname, h5shape, n_rows / 1.0e6)) h5.close()
def main(): global opt opt = get_opt() info = { 'date': opt.stop, 'start': opt.start, 'stop': opt.stop, 'box_duration_months': opt.box_duration } asol_aimpoint = get_asol(info) asol_monthly = AsolBinnedStats(asol_aimpoint, 365.25 / 12) for det in ('ACIS-S', 'ACIS-I'): asol_monthly.det = det det_title = asol_monthly.det_title info[det_title] = asol_monthly.get_chip_x_y_info() asol_monthly.plot_chip_x_y(info[det_title]) asol_monthly.plot_intra_obs_dy_dz() plot_housing_temperature() info_file = os.path.join(opt.data_root, 'info.json') with open(info_file, 'w') as fh: logger.info('Writing info file {}'.format(info_file)) json.dump(make_pure_python(info), fh, indent=4, sort_keys=True)
def get_idx_cmds(cmds, pars_dict): """ For the input `cmds` (list of dicts), convert to the indexed command format where parameters are specified as an index into `pars_dict`, a dict of unique parameter values. Returns `idx_cmds` as a list of tuples: (par_idx, date, time, cmd, tlmsid, scs, step, timeline_id, vcdu) """ idx_cmds = [] for i, cmd in enumerate(cmds): if i % 10000 == 9999: logger.info(' Iteration {}'.format(i)) # Define a consistently ordered tuple that has all command parameter information pars = cmd['params'] keys = set(pars.keys()) - set(('SCS', 'STEP', 'TLMSID')) if cmd['tlmsid'] == 'AOSTRCAT': # Skip star catalog command because that has many (uninteresting) parameters # and increases the file size and load speed by an order of magnitude. pars_tup = () else: pars_tup = tuple((key.lower(), pars[key]) for key in sorted(keys)) try: par_idx = pars_dict[pars_tup] except KeyError: par_idx = len(pars_dict) pars_dict[pars_tup] = par_idx idx_cmds.append((par_idx, cmd['date'], cmd['type'], cmd.get('tlmsid'), cmd['scs'], cmd['step'], cmd['timeline_id'], cmd['vcdu'])) return idx_cmds
def main(): filetypes = Ska.Table.read_ascii_table('filetypes.dat') if len(sys.argv) == 2: filetypes = filetypes[ filetypes['content'] == sys.argv[1].upper() ] loglevel = pyyaks.logger.INFO logger = pyyaks.logger.get_logger(level=loglevel, format="%(message)s") for filetype in filetypes: ft.content = filetype.content.lower() orig_files_glob = os.path.join(orig_arch_files['contentdir'].abs, filetype['fileglob']) logger.info('orig_files_glob=%s', orig_files_glob) for f in glob.glob(orig_files_glob): ft.basename = os.path.basename(f) tstart = re.search(r'(\d+)', ft.basename).group(1) datestart = DateTime(tstart).date ft.year, ft.doy = re.search(r'(\d\d\d\d):(\d\d\d)', datestart).groups() archdir = arch_files['archdir'].abs archfile = arch_files['archfile'].abs if not os.path.exists(archdir): print 'Making dir', archdir os.makedirs(archdir) if not os.path.exists(archfile): # logger.info('mv %s %s' % (f, archfile)) shutil.move(f, archfile)
def plot_n_kalman(obsid, plot_dir, save=False): """ Fetch and plot number of Kalman stars as function of time for the requested obsid. """ d = events.dwells.filter(obsid=obsid)[0] start = d.start stop = d.stop n_kalman = get_n_kalman(start, stop) plt.figure(figsize=(8, 2.5)) t0 = n_kalman.times[0] # The Kalman vals are strings, so these can be out of order on y axis # if not handled as ints. plot_cxctime(n_kalman.times, n_kalman.vals.astype(int), color='k') plot_cxctime([t0, t0 + 1000], [0.5, 0.5], lw=3, color='orange') plt.text(DateTime(t0).plotdate, 0.7, "1 ksec") plt.ylabel('# Kalman stars') ylims = plt.ylim() plt.ylim(-0.2, ylims[1] + 0.2) plt.grid(ls=':') plt.subplots_adjust(left=0.1, right=0.95, bottom=0.25, top=0.95) if save: outroot = os.path.join(plot_dir, f'n_kalman_{obsid}') logger.info(f'Writing plot file {outroot}.png') plt.savefig(outroot + '.png') plt.close()
def main(): filetypes = Ska.Table.read_ascii_table('filetypes.dat') if len(sys.argv) == 2: filetypes = filetypes[filetypes['content'] == sys.argv[1].upper()] loglevel = pyyaks.logger.INFO logger = pyyaks.logger.get_logger(level=loglevel, format="%(message)s") for filetype in filetypes: ft.content = filetype.content.lower() orig_files_glob = os.path.join(orig_arch_files['contentdir'].abs, filetype['fileglob']) logger.info('orig_files_glob=%s', orig_files_glob) for f in glob.glob(orig_files_glob): ft.basename = os.path.basename(f) tstart = re.search(r'(\d+)', ft.basename).group(1) datestart = DateTime(tstart).date ft.year, ft.doy = re.search(r'(\d\d\d\d):(\d\d\d)', datestart).groups() archdir = arch_files['archdir'].abs archfile = arch_files['archfile'].abs if not os.path.exists(archdir): print 'Making dir', archdir os.makedirs(archdir) if not os.path.exists(archfile): # logger.info('mv %s %s' % (f, archfile)) shutil.move(f, archfile)
def new_func(*args, **kwargs): runval = run(func.__name__) if isinstance(run, collections.Callable) else run if runval is False: return elif runval is True: pass elif runval is None: if status['fail']: return else: raise ValueError('run value = %s but must be True, False, or None' % runval) logger.verbose('') logger.verbose('-' * 60) logger.info(' Running task: %s at %s' % (func.__name__, time.ctime())) logger.verbose('-' * 60) try: func(*args, **kwargs) pyyaks.context.store_context(status.get('context_file')) except KeyboardInterrupt: raise except TaskSkip: pass except: if status['fail'] is False: logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc())) status['fail'] = True
def try4times(func, *arg, **kwarg): """ Work around problems with sqlite3 database getting locked out from writing, presumably due to read activity. Not completely understood. This function will try to run func(*arg, **kwarg) a total of 4 times with an increasing sequence of wait times between tries. It catches only a database locked error. """ from django.db.utils import OperationalError for delay in 0, 5, 10, 60: if delay > 0: time.sleep(delay) try: func(*arg, **kwarg) except OperationalError as err: if 'database is locked' in str(err): # Locked DB, issue informational warning logger.info('Warning: locked database, waiting {} seconds'.format(delay)) else: # Something else so just re-raise raise else: # Success, jump out of loop break else: # After 4 tries bail out with an exception raise OperationalError('database is locked')
def del_stats(colname, time0, interval): """Delete all rows in ``interval`` stats file for column ``colname`` that occur after time ``time0`` - ``interval``. This is used to fix problems that result from a file misorder. Subsequent runs of update_stats will refresh the values correctly. """ dt = {'5min': 328, 'daily': 86400}[interval] ft['msid'] = colname ft['interval'] = interval stats_file = msid_files['stats'].abs if not os.path.exists(stats_file): raise IOError('Stats file {} not found'.format(stats_file)) logger.info('Fixing stats file %s after time %s', stats_file, DateTime(time0).date) stats = tables.open_file(stats_file, mode='a', filters=tables.Filters(complevel=5, complib='zlib')) index0 = time0 // dt - 1 indexes = stats.root.data.col('index')[:] row0 = np.searchsorted(indexes, [index0])[0] - 1 if opt.dry_run: n_del = len(stats.root.data) - row0 else: n_del = stats.root.data.remove_rows(row0, len(stats.root.data)) logger.info('Deleted %d rows from row %s (%s) to end', n_del, row0, DateTime(indexes[row0] * dt).date) stats.close()
def get_obsid(obsid, dt=3.0): """ Get an obsid """ obsids = events.obsids.filter(obsid__exact=obsid) if len(obsids) == 0: raise ValueError('No obsid={} in kadi database'.format(obsid)) dwells = events.dwells.filter(obsids[0].start, obsids[0].stop) obsid_dwells = [dwell for dwell in dwells if dwell.start > obsids[0].start] logger.info('Using obsid dwell(s): {}' .format(','.join(str(dwell) for dwell in obsid_dwells))) scs107s = events.scs107s.filter(obsid=obsid) tstart = DateTime(obsid_dwells[0].start).secs if len(scs107s) > 0: tstop = scs107s[0].tstop - 200 else: tstop = DateTime(obsid_dwells[-1].stop).secs if tstop - tstart < 2000: raise ValueError('Observation interval too short {}'.format(tstop - tstart)) telems, slots = get_archive_data(tstart, tstop) out = telems_to_struct(telems, slots) out['obsid'] = obsid return out
def create_content_dir(): """ Make empty files for colnames.pkl, colnames_all.pkl and archfiles.db3 for the current content type ft['content']. This only works within the development (git) directory in conjunction with the --create option. """ dirname = msid_files['contentdir'].abs if not os.path.exists(dirname): logger.info('Making directory {}'.format(dirname)) os.makedirs(dirname) empty = set() if not os.path.exists(msid_files['colnames'].abs): with open(msid_files['colnames'].abs, 'w') as f: pickle.dump(empty, f) if not os.path.exists(msid_files['colnames_all'].abs): with open(msid_files['colnames_all'].abs, 'w') as f: pickle.dump(empty, f) if not os.path.exists(msid_files['archfiles'].abs): archfiles_def = open('archfiles_def.sql').read() filename = msid_files['archfiles'].abs logger.info('Creating db {}'.format(filename)) db = Ska.DBI.DBI(dbi='sqlite', server=filename, autocommit=False) db.execute(archfiles_def) db.commit()
def main(): opt = get_opt() logger.info('Centroid dashboard, started') update_observed_metrics(obsid=opt.obsid, start=opt.start, stop=opt.stop, force=opt.force, data_root=opt.data_root, make_plots=opt.make_plots, save=opt.save) logger.info('Centroid dashboard, ended')
def make_archfiles_db(filename, content_def): # Do nothing if it is already there if os.path.exists(filename): return datestart = DateTime(DateTime(opt.start).secs - 60) tstart = datestart.secs tstop = tstart year, doy = datestart.date.split(':')[:2] times, indexes = derived.times_indexes(tstart, tstop, content_def['time_step']) logger.info('Creating db {}'.format(filename)) archfiles_def = open(Path(__file__).parent / 'archfiles_def.sql').read() db = Ska.DBI.DBI(dbi='sqlite', server=filename) db.execute(archfiles_def) archfiles_row = dict( filename='{}:0:1'.format(content_def['content']), filetime=0, year=year, doy=doy, tstart=tstart, tstop=tstop, rowstart=0, rowstop=0, startmjf=indexes[0], # really index0 stopmjf=indexes[-1], # really index1 date=datestart.date) db.insert(archfiles_row, 'archfiles')
def create_content_dir(): """ Make empty files for colnames.pkl, colnames_all.pkl and archfiles.db3 for the current content type ft['content']. This only works within the development (git) directory in conjunction with the --create option. """ dirname = msid_files['contentdir'].abs if not os.path.exists(dirname): logger.info('Making directory {}'.format(dirname)) os.makedirs(dirname) empty = set() if not os.path.exists(msid_files['colnames'].abs): with open(msid_files['colnames'].abs, 'wb') as f: pickle.dump(empty, f, protocol=0) if not os.path.exists(msid_files['colnames_all'].abs): with open(msid_files['colnames_all'].abs, 'wb') as f: pickle.dump(empty, f, protocol=0) if not os.path.exists(msid_files['archfiles'].abs): archfiles_def = open('archfiles_def.sql').read() filename = msid_files['archfiles'].abs logger.info('Creating db {}'.format(filename)) db = Ska.DBI.DBI(dbi='sqlite', server=filename, autocommit=False) db.execute(archfiles_def) db.commit()
def make_archfiles_db(filename, content_def): # Do nothing if it is already there if os.path.exists(filename): return datestart = DateTime(DateTime(opt.start).secs - 60) tstart = datestart.secs tstop = tstart year, doy = datestart.date.split(':')[:2] times, indexes = derived.times_indexes(tstart, tstop, content_def['time_step']) logger.info('Creating db {}'.format(filename)) archfiles_def = open('archfiles_def.sql').read() db = Ska.DBI.DBI(dbi='sqlite', server=filename) db.execute(archfiles_def) archfiles_row = dict(filename='{}:0:1'.format(content_def['content']), filetime=0, year=year, doy=doy, tstart=tstart, tstop=tstop, rowstart=0, rowstop=0, startmjf=indexes[0], # really index0 stopmjf=indexes[-1], # really index1 date=datestart.date) db.insert(archfiles_row, 'archfiles')
def update_sync_repo(opt, logger, content): """ :param opt: argparse options :param logger: logger instance :param content: content type :return: """ # File types context dict ft = fetch.ft ft['content'] = content index_file = Path(sync_files['index'].abs) index_tbl = update_index_file(index_file, opt, logger) if index_tbl is None: # Index table was not created, nothing more to do here logger.warning(f'No index table for {content}') return for row in index_tbl: ft = fetch.ft ft['date_id'] = row['date_id'] update_sync_data_full(content, logger, row) update_sync_data_stat(content, logger, row, '5min') update_sync_data_stat(content, logger, row, 'daily') remove_mask = remove_outdated_sync_files(opt, logger, index_tbl) if np.any(remove_mask): index_tbl = index_tbl[~remove_mask] logger.info( f'Writing {len(index_tbl)} row(s) to index file {index_file}') index_tbl.write(index_file, format='ascii.ecsv')
def get_idx_cmds(cmds, pars_dict): """ For the input `cmds` (list of dicts), convert to the indexed command format where parameters are specified as an index into `pars_dict`, a dict of unique parameter values. Returns `idx_cmds` as a list of tuples: (par_idx, date, time, cmd, tlmsid, scs, step) """ idx_cmds = [] for i, cmd in enumerate(cmds): if i % 10000 == 9999: logger.info(' Iteration {}'.format(i)) # Define a consistently ordered tuple that has all command parameter information pars = cmd['params'] keys = set(pars.keys()) - set(('SCS', 'STEP', 'TLMSID')) if cmd['tlmsid'] == 'AOSTRCAT': # Skip star catalog command because that has many (uninteresting) parameters # and increases the file size and load speed by an order of magnitude. pars_tup = () else: pars_tup = tuple((key.lower(), pars[key]) for key in sorted(keys)) try: par_idx = pars_dict[pars_tup] except KeyError: par_idx = len(pars_dict) pars_dict[pars_tup] = par_idx idx_cmds.append((par_idx, cmd['date'], cmd['type'], cmd.get('tlmsid'), cmd['scs'], cmd['step'], cmd['timeline_id'])) return idx_cmds
def make_h5_col_file(dats, colname): """Make a new h5 table to hold column from ``dat``.""" filename = msid_files['msid'].abs filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) # Estimate the number of rows for 20 years based on available data times = np.hstack([x['TIME'] for x in dats]) dt = np.median(times[1:] - times[:-1]) n_rows = int(86400 * 365 * 20 / dt) filters = tables.Filters(complevel=5, complib='zlib') h5 = tables.openFile(filename, mode='w', filters=filters) col = dats[0][colname] h5shape = (0,) + col.shape[1:] h5type = tables.Atom.from_dtype(col.dtype) h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname, expectedrows=n_rows) h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality', expectedrows=n_rows) logger.info('Made {} shape={} with n_rows(1e6)={}' .format(colname, h5shape, n_rows / 1.0e6)) h5.close()
def _auto_file_discovery(ingest_type, source_type): """ Automatically discover and return a list of files to ingest in the staging area. Parameters ========== ingest_type : str the type of file to match in the file search. examples of valid values are: h5, hdf, csv, txt source_type : str the source type of the data e.g. (FILE_PREFIX, Directory Path or Sim vs. Flight) source_type of an empty string means sources of ingest_type in default staging. Returns ======= A Python list of filenames based on the type of files to search for given by the discovery parameters. """ logger.info(f"Attempting automatic file discovery in {STAGING_DIRECTORY} with ingest type {ingest_type}... ") ingest_files = [] ingest_files.extend(sorted(glob.glob(f"{STAGING_DIRECTORY}/{source_type}*.{ingest_type}"))) logger.info(f"{len(ingest_files)} file(s) staged in {STAGING_DIRECTORY} ...") return ingest_files
def _sync_stat_archive(opt, msid_files, logger, content, stat, index_tbl): """ Actual worker for syncing the stat archive for ``content``. """ # Get the last row of data from the length of the TIME.col (or archfiles?) ft = fetch.ft ft['content'] = content ft['interval'] = stat stats_dir = Path(msid_files['statsdir'].abs) if not stats_dir.exists(): logger.debug(f'Skipping {stat} data for {content}: no directory') return logger.info('') logger.info(f'Processing {stat} data for {content}') # Get the MSIDs that are in client archive msids = [str(fn.name)[:-3] for fn in stats_dir.glob('*.h5')] if not msids: logger.debug(f'Skipping {stat} data for {content}: no stats h5 files') return else: logger.debug(f'Stat msids are {msids}') last_date_id, last_date_id_file = get_last_date_id( msid_files, msids, stat, logger) logger.verbose(f'Got {last_date_id} as last date_id that was applied to archive') # Get list of applicable dat objects (new data, before opt.date_stop). Also # return ``date_id`` which is the date_id of the final data set in the list. # This will be written as the new ``last_date_id``. try: dats, date_id = get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt) except urllib.error.URLError as err: if 'timed out' in str(err): msg = f' ERROR: timed out getting {stat} data for {content}' logger.error(msg) process_errors.append(msg) return else: raise if not dats: return dat, msids = concat_data_sets(dats, ['data']) with DelayedKeyboardInterrupt(logger): with timing_logger(logger, f'Applying updates to {len(msids)} h5 files'): for msid in msids: fetch.ft['msid'] = msid stat_file = msid_files['stats'].abs if os.path.exists(stat_file): append_stat_col(dat, stat_file, msid, date_id, opt, logger) logger.debug(f'Updating {last_date_id_file} with {date_id}') if not opt.dry_run: with open(last_date_id_file, 'w') as fh: fh.write(f'{date_id}')
def plot_att_errors_per_obsid(obsid, plot_dir, coord='dr', att_errors=None, save=False, on_the_fly=False): """ Make png plot of att errors vs time per obsid. :param obsid: obsid :param att_errors: dictionary with keys including at minimum a coordinate ('dr', 'dy' or 'dp' for roll, yaw and pitch) and 'time' (default 'dr') :param on_the_fly: default False, if True then ignore param att_errors and derive attitude errors for the requested obsid. """ if coord not in ('dr', 'dp', 'dy'): raise ValueError('Coordinate for att error should be dr, dp or dy') if on_the_fly: att_errors = get_observed_att_errors(obsid, on_the_fly=on_the_fly) if att_errors is None: return None else: if att_errors is None: raise ValueError('Need to provide att_errors if on_the_fly is False') errs = att_errors[coord] dates = DateTime(att_errors['time']) plt.figure(figsize=(8, 2.5)) # Skip the first 5 min for observations with duration > 5 min dur = dates.secs[-1] - dates.secs[0] if dur > 5 * 60: ok = dates.secs > dates.secs[0] + 5 * 60 else: ok = np.ones_like(dates.secs, dtype=bool) plt.plot(dates.secs[ok] - dates.secs[ok][0], errs[ok], '-', lw=2, color='k') ylims = plt.ylim() if max(ylims) > 100: plt.ylim(-max(ylims) - 10, max(ylims) + 10) else: plt.ylim(-100, 100) plt.ylabel(f'{coord} (arcsec)') plt.xlabel('Time (sec)') plt.grid(ls=':') plt.subplots_adjust(left=0.1, right=0.95, bottom=0.25, top=0.95) if save: outroot = os.path.join(plot_dir, f'observed_{coord}s_{obsid}') logger.info(f'Writing plot file {outroot}.png') plt.savefig(outroot + '.png') plt.close() return att_errors
def check_filetype(filetype): ft['content'] = filetype.content.lower() if not os.path.exists(msid_files['archfiles'].abs): logger.info('No archfiles.db3 for %s - skipping' % ft['content']) return logger.info('Checking {} content type, archfiles {}'.format( ft['content'], msid_files['archfiles'].abs)) db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs) archfiles = db.fetchall('select * from archfiles') db.conn.close() if opt.check_order: for archfile0, archfile1 in zip(archfiles[:-1], archfiles[1:]): exception = (archfile0['startmjf'] == 77826 and archfile0['year'] == 2004 and archfile0['doy'] == 309) if archfile1['tstart'] < archfile0['tstart'] and not exception: logger.info( 'ERROR: archfile order inconsistency\n {}\n{}'.format( archfile0, archfile1)) if not opt.check_lengths: colnames = ['TIME'] else: colnames = [ x for x in pickle.load(open(msid_files['colnames'].abs)) if x not in fetch.IGNORE_COLNAMES ] lengths = set() for colname in colnames: ft['msid'] = colname h5 = tables.open_file(msid_files['msid'].abs, mode='r') length = len(h5.root.data) h5.root.data[length - 1] h5.close() logger.verbose('MSID {} has length {}'.format(colname, length)) lengths.add(length) if len(lengths) != 1: logger.info('ERROR: inconsistent MSID length {} {} {}'.format( ft['content'], colname, lengths)) return # Other checks don't make sense now length = lengths.pop() archfile = archfiles[-1] if archfile['rowstop'] != length: logger.info('ERROR: inconsistent archfile {}: ' 'last rowstop={} MSID length={}'.format( ft['content'], archfile['rowstop'], length)) if opt.find_glitch: find_glitch()
def get_stats_over_time(start, stop=None, sp=False, dp=None, ir=False, ms=None, slots='combined', t_samp=1000): """ Equivalent to get_stats_per_interval, but concatenate the results for all obsids within the specified time interval. """ # Get obsids in time range and collect all the per-interval statistics obsids = events.obsids.filter(start, stop, dur__gt=2000) stats_list = [] for obsid in obsids: set_FILES_context(obsid.obsid, sp, dp, ir, ms, t_samp, slots) # First check that there is the raw dat file for this obsid. Nothing # can be done without this. dat_file = FILES['dat.pkl'].rel if not os.path.exists(dat_file): logger.info('Skipping {}: {} not in archive'.format(obsid, dat_file)) continue # Now get the stats for this obsid. Hopefully it has already been computed and # is cached as a file. If not, try to compute the stats (and cache). If that # fails then press on but touch a file to indicate failure so subsequent attempts # don't bother. logger.info('Processing obsid {}'.format(obsid)) try: stats = get_cached_stats() # depends on the context set previously except FailedStatsFile: # Previously failed logger.info(' Skipping {}: failed statistics'.format(obsid.obsid)) continue except NoStatsFile: logger.info(' Reading pickled data file {}'.format(dat_file)) dat = pickle.load(open(dat_file, 'r')) try: logger.info(' Computing statistics') if slots == 'combined': stats = get_stats_per_interval_combined(dat, sp, dp, ir, ms, t_samp) else: stats = get_stats_per_interval_per_slot(dat, sp, dp, ir, ms, slots, t_samp) except ValueError as err: open(FILES['stats.ERR'].rel, 'w') # touch file to indicate failure to compute stats logger.warn(' ERROR: {}'.format(err)) stats['obsid'] = obsid.obsid stats_list.append(stats) stats = {} for case in STAT_CASES: stats[case] = {} for stat_type in STAT_TYPES: stats[case][stat_type] = np.hstack([x[case][stat_type] for x in stats_list]) # Set corresponding array of obsids for back-tracing outliers etc stats['obsid'] = np.hstack([np.ones(len(x['obc']['std']), dtype=int) * x['obsid'] for x in stats_list]) return stats
def main(): global opt, ft, msid_files, logger opt, args = get_options() ft = fetch.ft msid_files = pyyaks.context.ContextDict('add_derived.msid_files', basedir=opt.data_root) msid_files.update(file_defs.msid_files) logger = pyyaks.logger.get_logger(name='engarchive', level=pyyaks.logger.VERBOSE, format="%(asctime)s %(message)s") # Get the derived parameter classes dp_classes = (getattr(derived, x) for x in dir(derived) if x.startswith('DP_')) dp_classes = [ x for x in dp_classes if hasattr(x, '__base__') and issubclass(x, derived.DerivedParameter) ] content_defs = {} for dp_class in dp_classes: colname = dp_class.__name__.upper() dp = dp_class() content = dp.content if opt.content == [] or any( re.match(x + r'\d+', content) for x in opt.content): dpd = content_defs.setdefault(content, {}) dpd.setdefault('classes', {'TIME': None}) dpd['content'] = content dpd['classes'][colname] = dp_class dpd['mnf_step'] = dp.mnf_step dpd['time_step'] = dp.time_step for content, content_def in content_defs.items(): ft['content'] = content logger.info('CONTENT = {}'.format(content)) # Make content directory if not os.path.exists(msid_files['contentdir'].rel): logger.info('Making directory {}'.format( msid_files['contentdir'].rel)) os.mkdir(msid_files['contentdir'].rel) # Make the archfiles.db3 file (if needed) make_archfiles_db(msid_files['archfiles'].abs, content_def) for colname in content_def['classes']: ft['msid'] = colname logger.debug('MSID = {}'.format(colname)) # Create colnames and colnames_all pickle files (if needed) and add colname add_colname(msid_files['colnames'].rel, colname) add_colname(msid_files['colnames_all'].rel, colname) make_msid_file(colname, content, content_def) add_colname(msid_files['colnames_all'].rel, 'QUALITY')
def get_index_tbl(content, logger, opt): # Read the index file to know what is available for new data with get_readable(opt.sync_root, opt.is_url, sync_files['index']) as (index_input, uri): if index_input is None: # If index_file is not found then get_readable returns None logger.info(f'No new sync data for {content}: {uri} not found') return None logger.info(f'Reading index file {uri}') index_tbl = Table.read(index_input, format='ascii.ecsv') return index_tbl
def read_archfile(i, f, filetype, row, colnames, archfiles, db): """Read filename ``f`` with index ``i`` (position within list of filenames). The file has type ``filetype`` and will be added to MSID file at row index ``row``. ``colnames`` is the list of column names for the content type (not used here). """ # Check if filename is already in archfiles. If so then abort further processing. filename = os.path.basename(f) if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename, )): logger.verbose( 'File %s already in archfiles - unlinking and skipping' % f) os.unlink(f) return None, None # Read FITS archive file and accumulate data into dats list and header into headers dict logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename)) hdus = pyfits.open(f) hdu = hdus[1] try: dat = converters.convert(hdu.data, filetype['content']) except converters.NoValidDataError: # When creating files allow NoValidDataError hdus.close() logger.warning( 'WARNING: no valid data in data file {}'.format(filename)) return None, None except converters.DataShapeError as err: hdus.close() logger.warning( 'WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'. format(filename, hdu.header['ASCDSVER'], err)) return None, None # Accumlate relevant info about archfile that will be ingested into # MSID h5 files. Commit info before h5 ingest so if there is a failure # the needed info will be available to do the repair. archfiles_row = dict( (x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols) archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum archfiles_row['rowstart'] = row archfiles_row['rowstop'] = row + len(dat) archfiles_row['filename'] = filename archfiles_row['filetime'] = int( re.search(r'(\d+)', archfiles_row['filename']).group(1)) filedate = DateTime(archfiles_row['filetime']).date year, doy = (int(x) for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups()) archfiles_row['year'] = year archfiles_row['doy'] = doy hdus.close() return dat, archfiles_row
def get_obsid_data(obsid): filename = os.path.join('data', str(obsid) + '.pkl') if os.path.exists(filename): dat = pickle.load(open(filename, 'r')) else: import update_flags_archive dat = update_flags_archive.get_obsid(obsid) pickle.dump(dat, open(filename, 'w'), protocol=-1) logger.info('Wrote data for {}'.format(obsid)) return dat
def cut_stars(ai): starfiles = glob(os.path.join(ai['outdir'], "*stars.txt")) shutil.copy(starfiles[0], starfiles[0] + ".orig") starlines = open(starfiles[0]).read().split("\n") for slot in ai['skip_slot']: starlines = [i for i in starlines if not re.match("^\s+{}\s+1.*".format(slot), i)] logger.info('Cutting stars by updating {}'.format(starfiles[0])) with open(starfiles[0], "w") as newlist: newlist.write("\n".join(starlines))
def main(args=None): global logger opt = get_opt(args) logger = pyyaks.logger.get_logger(name='kadi', level=opt.log_level, format="%(asctime)s %(message)s") log_run_info(logger.info, opt) # Set the global root data directory. This gets used in ..paths to # construct file names. The use of an env var is needed to allow # configurability of the root data directory within django. os.environ['KADI'] = os.path.abspath(opt.data_root) idx_cmds_path = IDX_CMDS_PATH() pars_dict_path = PARS_DICT_PATH() try: with open(pars_dict_path, 'rb') as fh: pars_dict = pickle.load(fh) logger.info('Read {} pars_dict values from {}'.format(len(pars_dict), pars_dict_path)) except IOError: logger.info('No pars_dict file {} found, starting from empty dict' .format(pars_dict_path)) pars_dict = {} if not opt.mp_dir: for prefix in ('/', os.environ['SKA']): pth = Path(prefix, 'data', 'mpcrit1', 'mplogs') if pth.exists(): opt.mp_dir = str(pth) break else: raise FileNotFoundError('no mission planning directories found (need --mp-dir)') logger.info(f'Using mission planning files at {opt.mp_dir}') # Recast as dict subclass that remembers if any element was updated pars_dict = UpdatedDict(pars_dict) stop = DateTime(opt.stop) if opt.stop else DateTime() + 21 start = DateTime(opt.start) if opt.start else stop - 42 cmds = get_cmds(start, stop, opt.mp_dir) idx_cmds = get_idx_cmds(cmds, pars_dict) add_h5_cmds(idx_cmds_path, idx_cmds) if pars_dict.n_updated > 0: with open(pars_dict_path, 'wb') as fh: pickle.dump(pars_dict, fh, protocol=2) logger.info('Wrote {} pars_dict values ({} new) to {}' .format(len(pars_dict), pars_dict.n_updated, pars_dict_path)) else: logger.info('pars_dict was unmodified, not writing')
def add_asol_to_h5(filename, asol): asol = asol.as_array() h5 = tables.openFile(filename, mode='a', filters=tables.Filters(complevel=5, complib='zlib')) try: logger.info('Appending {} records to {}'.format(len(asol), filename)) h5.root.data.append(asol) except tables.NoSuchNodeError: logger.info('Creating {}'.format(filename)) h5.createTable(h5.root, 'data', asol, "Aimpoint drift", expectedrows=1e6) h5.root.data.flush() h5.close()
def check_filetype(filetype): ft['content'] = filetype.content.lower() if not os.path.exists(msid_files['archfiles'].abs): logger.info('No archfiles.db3 for %s - skipping' % ft['content']) return logger.info('Checking {} content type, archfiles {}'.format( ft['content'], msid_files['archfiles'].abs)) db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs) archfiles = db.fetchall('select * from archfiles') db.conn.close() if opt.check_order: for archfile0, archfile1 in zip(archfiles[:-1], archfiles[1:]): exception = (archfile0['startmjf'] == 77826 and archfile0['year'] == 2004 and archfile0['doy'] == 309) if archfile1['tstart'] < archfile0['tstart'] and not exception: logger.info('ERROR: archfile order inconsistency\n {}\n{}' .format(archfile0, archfile1)) if not opt.check_lengths: colnames = ['TIME'] else: colnames = [x for x in pickle.load(open(msid_files['colnames'].abs)) if x not in fetch.IGNORE_COLNAMES] lengths = set() for colname in colnames: ft['msid'] = colname h5 = tables.openFile(msid_files['msid'].abs, mode='r') length = len(h5.root.data) h5.root.data[length - 1] h5.close() logger.verbose('MSID {} has length {}'.format(colname, length)) lengths.add(length) if len(lengths) != 1: logger.info('ERROR: inconsistent MSID length {} {} {}'.format( ft['content'], colname, lengths)) return # Other checks don't make sense now length = lengths.pop() archfile = archfiles[-1] if archfile['rowstop'] != length: logger.info('ERROR: inconsistent archfile {}: ' 'last rowstop={} MSID length={}'.format( ft['content'], archfile['rowstop'], length)) if opt.find_glitch: find_glitch()
def copy_dark_image(): """ Copy dark cal image from Ska to Mica """ outdir = MICA_FILES['dark_cal_dir'].abs if not os.path.exists(outdir): logger.info('Making output dark cal directory {}'.format(outdir)) os.makedirs(outdir) infile = SKA_FILES['dark_image.fits'].abs outfile = MICA_FILES['dark_image.fits'].abs logger.info('Copying {} to {}'.format(infile, outfile)) shutil.copy(infile, outfile)
def get_archive_files(filetype): """Update FITS file archive with arc5gl and ingest files into msid (HDF5) archive""" # If running on the OCC GRETA network the cwd is a staging directory that # could already have files. Also used in testing. # Don't allow arbitrary arch files at once because of memory issues. files = sorted(glob.glob(filetype['fileglob'])) if opt.occ or files: return sorted(files)[:opt.max_arch_files] # Retrieve CXC archive files in a temp directory with arc5gl arc5 = Ska.arc5gl.Arc5gl(echo=True) # End time for archive queries (minimum of start + max_query_days and NOW) datestop = DateTime(opt.date_now) # Get datestart as the most-recent file time from archfiles table. However, # do not look back further than --max-lookback-time db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs) vals = db.fetchone("select max(filetime) from archfiles") datestart = DateTime( max(vals['max(filetime)'], datestop.secs - opt.max_lookback_time * 86400)) # For *ephem0 the query needs to extend well into the future # to guarantee getting all available files. This is the archives fault. if filetype['level'] == 'L0' and filetype['instrum'] == 'EPHEM': datestop = datestop + 50 # For instrum==EPHEM break queries into time ranges no longer than # 100000 sec each. EPHEM files are at least 7 days long and generated # no more often than every ~3 days so this should work. n_queries = (1 if filetype['instrum'] != 'EPHEM' else 1 + round( (datestop.secs - datestart.secs) / 100000.)) times = np.linspace(datestart.secs, datestop.secs, n_queries + 1) logger.info('********** %s %s **********' % (ft['content'], time.ctime())) for t0, t1 in zip(times[:-1], times[1:]): if t1 > t0: arc5.sendline('tstart=%s' % DateTime(t0).date) arc5.sendline('tstop=%s' % DateTime(t1).date) arc5.sendline('get %s' % filetype['arc5gl_query'].lower()) else: logger.info( 'INFO: Skipping archive query because datestop={} < datestart={}' .format(DateTime(t1).date, DateTime(t0).date)) return sorted(glob.glob(filetype['fileglob']))
def end(message=None, context_file=None, context_keys=None): """End a pipeline sequence.""" if context_file is not None: store_context(context_file, context_keys) if message is not None: logger.info('') logger.info('*' * 60) logger.info('** %-54s **' % (pyyaks.context.render(message) + (' FAILED' if status['fail'] else ' SUCCEEDED'))) logger.info('*' * 60) logger.info('') status['fail'] = False
def plot_observed_aimpoints(obs_aimpoints): """ Make png and html (mpld3) plot of data in the ``obs_aimpoints`` table. """ plt.close(1) fig = plt.figure(1, figsize=(8, 4)) dates = DateTime(obs_aimpoints['mean_date']) years = dates.frac_year times = dates.secs ok = years > np.max(years) - float(opt.lookback) / 365.25 obs_aimpoints = obs_aimpoints[ok] times = times[ok] lolims = {} uplims = {} for axis in ('dx', 'dy'): lolims[axis] = obs_aimpoints[axis] > 10 uplims[axis] = obs_aimpoints[axis] < -10 obs_aimpoints[axis] = obs_aimpoints[axis].clip(-10, 10) ok = ((np.abs(obs_aimpoints['target_offset_y']) < 100) & (np.abs(obs_aimpoints['target_offset_z']) < 100)) plot_cxctime(times[ok], obs_aimpoints['dx'][ok], 'ob', label='CHIPX') plot_cxctime(times[ok], obs_aimpoints['dy'][ok], 'or', label='CHIPY') plot_cxctime(times[~ok], obs_aimpoints['dx'][~ok], '*b', label='CHIPX (offset > 100")') plot_cxctime(times[~ok], obs_aimpoints['dy'][~ok], '*r', label='CHIPY (offset > 100")') for axis in ('dx', 'dy'): if np.any(lolims[axis]): plt.errorbar(DateTime(times[lolims[axis]]).plotdate, obs_aimpoints[axis][lolims[axis]], marker='.', yerr=1.5, lolims=True) if np.any(uplims[axis]): plt.errorbar(DateTime(times[uplims[axis]]).plotdate, obs_aimpoints[axis][uplims[axis]], marker='.', yerr=1.5, uplims=True) plt.grid() ymax = max(12, np.max(np.abs(obs_aimpoints['dx'])), np.max(np.abs(obs_aimpoints['dy']))) plt.ylim(-ymax, ymax) plt.ylabel('Offset (arcsec)') plt.title('Observed aimpoint offsets') plt.legend(loc='upper left', fontsize='small', title='', framealpha=0.5) outroot = os.path.join(opt.data_root, 'observed_aimpoints') logger.info('Writing plot files {}.png,html'.format(outroot)) mpld3.plugins.connect(fig, mpld3.plugins.MousePosition(fmt='.1f')) mpld3.save_html(fig, outroot + '.html') fig.patch.set_visible(False) plt.savefig(outroot + '.png', frameon=False)
def run_ai(ais): """ Run aspect pipeline 'flt_run_pipe' over the aspect intervals described in the list of dictionaries passed as an argument """ ascds_env = getenv('source /home/ascds/.ascrc -r release', shell='tcsh') tcsh_shell("punlearn asp_l1_std", env=ascds_env) ocat_env = getenv( 'source /proj/sot/ska/data/aspect_authorization/set_ascds_ocat_vars.csh', shell='tcsh') for var in ['ASCDS_OCAT_UNAME', 'ASCDS_OCAT_SERVER', 'ASCDS_OCAT_PWORD']: ascds_env[var] = ocat_env[var] if opt.param is not None and len(opt.param): for param in opt.param: cmd = 'pset asp_l1_std {}'.format(param) tcsh_shell(cmd, env=ascds_env) logger_fh = FilelikeLogger(logger) for ai in ais: pipe_cmd = 'flt_run_pipe -r {root} -i {indir} -o {outdir} \ -t {pipe_ped} \ -a "INTERVAL_START"={istart} \ -a "INTERVAL_STOP"={istop} \ -a obiroot={obiroot} \ -a revision=1 '.format(**ai) if 'pipe_start_at' in ai: pipe_cmd = pipe_cmd + " -s {}".format(ai['pipe_start_at']) if 'pipe_stop_before' in ai: pipe_cmd = pipe_cmd + " -S {}".format(ai['pipe_stop_before']) if 'skip_slot' in ai: try: tcsh_shell(pipe_cmd + " -S check_star_data", env=ascds_env, logfile=logger_fh) except ShellError as sherr: # if shell error, just check to see if get_star_data completed successfully loglines = open(logger_fh.filename).read() if not re.search("get_star_data completed successfully", loglines): raise ShellError(sherr) cut_stars(ai) tcsh_shell(pipe_cmd + " -s check_star_data", env=ascds_env, logfile=logger_fh) else: logger.info('Running pipe command {}'.format(pipe_cmd)) tcsh_shell(pipe_cmd, env=ascds_env, logfile=logger_fh)
def main(): global opt, ft, msid_files, logger opt, args = get_options() ft = fetch.ft msid_files = pyyaks.context.ContextDict('add_derived.msid_files', basedir=opt.data_root) msid_files.update(file_defs.msid_files) logger = pyyaks.logger.get_logger(name='engarchive', level=pyyaks.logger.VERBOSE, format="%(asctime)s %(message)s") # Get the derived parameter classes dp_classes = (getattr(derived, x) for x in dir(derived) if x.startswith('DP_')) dp_classes = [x for x in dp_classes if hasattr(x, '__base__') and issubclass(x, derived.DerivedParameter)] content_defs = {} for dp_class in dp_classes: colname = dp_class.__name__.upper() dp = dp_class() content = dp.content if opt.content == [] or any(re.match(x + r'\d+', content) for x in opt.content): dpd = content_defs.setdefault(content, {}) dpd.setdefault('classes', {'TIME': None}) dpd['content'] = content dpd['classes'][colname] = dp_class dpd['mnf_step'] = dp.mnf_step dpd['time_step'] = dp.time_step for content, content_def in content_defs.items(): ft['content'] = content logger.info('CONTENT = {}'.format(content)) # Make content directory if not os.path.exists(msid_files['contentdir'].rel): logger.info('Making directory {}'.format(msid_files['contentdir'].rel)) os.mkdir(msid_files['contentdir'].rel) # Make the archfiles.db3 file (if needed) make_archfiles_db(msid_files['archfiles'].abs, content_def) for colname in content_def['classes']: ft['msid'] = colname logger.debug('MSID = {}'.format(colname)) # Create colnames and colnames_all pickle files (if needed) and add colname add_colname(msid_files['colnames'].rel, colname) add_colname(msid_files['colnames_all'].rel, colname) make_msid_file(colname, content, content_def) add_colname(msid_files['colnames_all'].rel, 'QUALITY')
def get_baseline_characteristics_file(): """ Get the most recent (presumed to be baseline) OFLS characteristics file on the OCCweb configuration directory. :returns: file path (including characteristics/ subdirectory) """ # Get the directory of available files. This is an HTML doc which consists of single # a list of links. logger.info('Getting baseline characteristics file') occweb.URLS['char_constr'] = '/occweb/FOT/configuration/documents/Characteristics_Constraints/' html = occweb.get_url('char_constr') html = scrape.cleantext(html) page = parse_html(html) # Matches CHARACTERIS_DDMMMYY RE_CHAR_FILE = re.compile(r'CHARACTERIS_ (?P<date> \d\d [A-Z]{3} \d\d) $', re.VERBOSE) # Find every link tag in the document and inspect every one with # href=<valid characteristics name> dates = [] filenames = [] links = page.findAll('a') for link in links: # If link reference matches regex then include for processing filename = link.attrs['href'] match = RE_CHAR_FILE.match(filename) if match: date = datetime.strptime(match.group('date'), '%d%b%y') dates.append(Time(date).yday) filenames.append(filename) logger.info(' Valid file: {}'.format(filename)) else: logger.info(' Skipping invalid href: {}'.format(filename)) # Get the filename for the most recent file filename = filenames[np.argmax(dates)] logger.info('{} is the most recent characteristics file'.format(filename)) occweb.URLS[filename] = occweb.URLS['char_constr'] + '/' + filename logger.info('Fetching {}'.format(occweb.URLS[filename])) html = occweb.get_url(filename) outfile = os.path.join(opt.data_root, 'characteristics', filename) if not os.path.exists(outfile): with open(outfile, 'w') as fs: fs.write(html) return outfile
def delete_from_date(EventModel, start, set_update_date=True): from .events import models date_start = DateTime(start).date cls_name = EventModel.__name__ if set_update_date: update = models.Update.objects.get(name=cls_name) logger.info("Updating {} date from {} to {}".format(cls_name, update.date, date_start)) update.date = date_start update.save() events = EventModel.objects.filter(start__gte=date_start) logger.info("Deleting {} {} events after {}".format(events.count(), cls_name, date_start)) events.delete()
def plot_housing_temperature(): dat = fetch.Msid('aach1t', '2000:001', stat='daily') plt.close(1) fig = plt.figure(figsize=(8, 4)) year = Time(dat.times, format='cxcsec').decimalyear plt.plot(year, dat.vals) plt.grid() plt.xlabel('Year') plt.ylabel('Temperature (degF)') plt.title('Aspect Camera housing temperature trend') outroot = os.path.join(opt.data_root, 'aca_housing_temperature') logger.info('Writing plot files {}.png'.format(outroot)) fig.patch.set_visible(False) plt.savefig(outroot + '.png', facecolor="none")
def get_cached_stats(root='stats'): stats_file = FILES['{}.pkl'.format(root)].rel failfile = FILES['{}.ERR'.format(root)].rel # If this was already computed then return the on-disk version if os.path.exists(stats_file): logger.info('Reading {}'.format(stats_file)) all_stats = pickle.load(open(stats_file, 'r')) return all_stats elif os.path.exists(failfile): raise FailedStatsFile('Known fail: file {} exists'.format(failfile)) else: raise NoStatsFile
def get_ending_roll_err(obsid, metrics_file=None): # Check for the value in the obsid file, otherwise recalculate saved_metrics = [] if metrics_file is not None and os.path.exists(metrics_file): saved_metrics = Table.read(metrics_file, format='ascii.ecsv') if len(saved_metrics) and obsid in saved_metrics['obsid']: logger.info(f"Getting ending roll for {obsid} from file") return saved_metrics[saved_metrics['obsid'] == obsid][0]['ending_roll_err'] else: att_errors = get_observed_att_errors(obsid, on_the_fly=True) if att_errors is None: return -9999 else: return att_errors['dr'][-1]
def get_cd_dir(obsid, data_root): """ Check if the centroid dashbord directory exists for the requested obsid, and create it if needed """ if obsid == -1: return "" cd_obsid_root = os.path.join(data_root, np.str(obsid)[:2], f"{obsid}") if not os.path.exists(cd_obsid_root): os.makedirs(cd_obsid_root) logger.info(f'Creating directory {cd_obsid_root}') return cd_obsid_root
def read_metrics_from_file(filename): """ Read in processed guide metrics (dr95, dr50, manvr_angle, ending dr, one shot updates, aber corrections) from file """ if os.path.exists(filename): logger.info(f'Reading {filename}') dat_old = Table.read(filename, format='ascii.ecsv', guess=False) processed_obsids = set(dat_old['obsid']) else: logger.info(f'File {filename} does not exist') dat_old = None processed_obsids = set() return dat_old, processed_obsids
def add_colname(filename, colname): """Add ``colname`` to the pickled set() in ``filename``. Create the pickle as needed. """ if not os.path.exists(filename): logger.info('Creating colnames pickle {}'.format(filename)) with open(filename, 'w') as f: pickle.dump(set(), f) colnames = pickle.load(open(filename, 'r')) if colname not in colnames: logger.info('Adding colname {} to colnames pickle {}'.format(colname, filename)) colnames.add(colname) with open(filename, 'w') as f: pickle.dump(colnames, f)
def get_archive_files(filetype): """Update FITS file archive with arc5gl and ingest files into msid (HDF5) archive""" # If running on the OCC GRETA network the cwd is a staging directory that # could already have files. Also used in testing. # Don't return more than opt.max_arch_files files at once because of memory # issues on gretasot. This only comes up when there has been some problem or stoppage. files = sorted(glob.glob(filetype['fileglob'])) if opt.occ or files: return sorted(files)[:opt.max_arch_files] # Retrieve CXC archive files in a temp directory with arc5gl arc5 = Ska.arc5gl.Arc5gl(echo=True) # End time for archive queries (minimum of start + max_query_days and NOW) datestop = DateTime(opt.date_now) # Get datestart as the most-recent file time from archfiles table. However, # do not look back further than --max-lookback-time db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs) vals = db.fetchone("select max(filetime) from archfiles") datestart = DateTime(max(vals['max(filetime)'], datestop.secs - opt.max_lookback_time * 86400)) # For *ephem0 the query needs to extend well into the future # to guarantee getting all available files. This is the archives fault. if filetype['level'] == 'L0' and filetype['instrum'] == 'EPHEM': datestop = datestop + 50 # For instrum==EPHEM break queries into time ranges no longer than # 100000 sec each. EPHEM files are at least 7 days long and generated # no more often than every ~3 days so this should work. n_queries = (1 if filetype['instrum'] != 'EPHEM' else 1 + round((datestop.secs - datestart.secs) / 100000.)) times = np.linspace(datestart.secs, datestop.secs, n_queries + 1) logger.info('********** %s %s **********' % (ft['content'], time.ctime())) for t0, t1 in zip(times[:-1], times[1:]): if t1 > t0: arc5.sendline('tstart=%s' % DateTime(t0).date) arc5.sendline('tstop=%s' % DateTime(t1).date) arc5.sendline('get %s' % filetype['arc5gl_query'].lower()) else: logger.info('INFO: Skipping archive query because datestop={} < datestart={}' .format(DateTime(t1).date, DateTime(t0).date)) return sorted(glob.glob(filetype['fileglob']))
def read_archfile(i, f, filetype, row, colnames, archfiles, db): """Read filename ``f`` with index ``i`` (position within list of filenames). The file has type ``filetype`` and will be added to MSID file at row index ``row``. ``colnames`` is the list of column names for the content type (not used here). """ # Check if filename is already in archfiles. If so then abort further processing. filename = os.path.basename(f) if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)): logger.verbose('File %s already in archfiles - unlinking and skipping' % f) os.unlink(f) return None, None # Read FITS archive file and accumulate data into dats list and header into headers dict logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename)) hdus = pyfits.open(f, character_as_bytes=True) hdu = hdus[1] try: dat = converters.convert(hdu.data, filetype['content']) except converters.NoValidDataError: # When creating files allow NoValidDataError hdus.close() logger.warning('WARNING: no valid data in data file {}'.format(filename)) return None, None except converters.DataShapeError as err: hdus.close() logger.warning('WARNING: skipping file {} with bad data shape: ASCDSVER={} {}' .format(filename, hdu.header['ASCDSVER'], err)) return None, None # Accumlate relevant info about archfile that will be ingested into # MSID h5 files. Commit info before h5 ingest so if there is a failure # the needed info will be available to do the repair. archfiles_row = dict((x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols) archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum archfiles_row['rowstart'] = row archfiles_row['rowstop'] = row + len(dat) archfiles_row['filename'] = filename archfiles_row['filetime'] = int(re.search(r'(\d+)', archfiles_row['filename']).group(1)) filedate = DateTime(archfiles_row['filetime']).date year, doy = (int(x) for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups()) archfiles_row['year'] = year archfiles_row['doy'] = doy hdus.close() return dat, archfiles_row