def are_we_in_comm(verbose=False, cadence=2, fake_comm=False): # Always be fetching from MAUDE fetch.data_source.set('maude allow_subset=True') # These fetches are really fast. Slow the cadence a bit. time.sleep(cadence) # cadence is in seconds here # If there VCDU frame values within the last 60 seconds, this will not be empty ref_vcdu = fetch.Msid('CVCDUCTR', start=CxoTime.now() - 60 * u.s) # Will be True if in comm, False if not. in_comm = len(ref_vcdu) > 0 if fake_comm is True: in_comm = True if verbose: if in_comm: print( f'({CxoTime.now().strftime("%m/%d/%Y %H:%M:%S")} | VCDU {ref_vcdu.vals[-1]} | #{in_comm_counter}) IN COMM!', end='\r') elif not in_comm: print( f'({CxoTime.now().strftime("%m/%d/%Y %H:%M:%S")}) Not in Comm. ', end='\r\r\r') return in_comm
def process_obsids(obsids, update=True, retry=False): report_root = REPORT_ROOT for obsid in obsids: strobs = "%05d" % obsid chunk_dir = strobs[0:2] topdir = os.path.join(report_root, chunk_dir) outdir = os.path.join(topdir, strobs) if os.path.exists(outdir) and not update: logger.info("Skipping {}, output dir exists.".format(obsid)) continue if not retry and os.path.exists(os.path.join(outdir, "proc_err")): logger.warning("Skipping {}, previous processing error.".format(obsid)) continue if not os.path.exists(outdir): os.makedirs("{}".format(outdir)) # Delete files from old failure if reprocessing for failfile in ['proc_err', 'trace.txt']: if os.path.exists(os.path.join(outdir, failfile)): os.unlink(os.path.join(outdir, failfile)) try: main(obsid) except: import traceback etype, emess, trace = sys.exc_info() logger.warning("Failed report on {}".format(obsid)) # Make an empty file to record the error status f = open(os.path.join(outdir, 'proc_err'), 'w') f.close() # Write out the traceback too trace_file = open(os.path.join(outdir, 'trace.txt'), 'w') traceback.print_tb(trace, file=trace_file) trace_file.close() # Write out a notes jason file notes = {'report_version': REPORT_VERSION, 'obsid': obsid, 'checked_date': CxoTime.now().date, 'last_sched': "{}".format(str(emess)), 'vv_version': None, 'vv_revision': None, 'aspect_1_id': None, 'ocat_status': None, 'long_term': None, 'short_term': None, 'starcheck': None} f = open(os.path.join(outdir, 'notes.json'), 'w') f.write(json.dumps(notes, sort_keys=True, indent=4)) f.close() # Make a stub html page proc_date = CxoTime.now().date jinja_env = jinja2.Environment( loader=jinja2.PackageLoader('mica.report')) jinja_env.line_comment_prefix = '##' jinja_env.line_statement_prefix = '#' template = jinja_env.get_template('proc_error.html') page = template.render(obsid=obsid, proc_date=proc_date, version=version) full_report_file = os.path.join(outdir, 'index.html') logger.info("Writing out error stub report to {}".format(full_report_file)) f = open(full_report_file, 'w') f.write(page) f.close() # Save the bad state in the database save_state_in_db(obsid, notes)
def update(): recent_obs = get_states(start=CxoTime.now() - 7, state_keys=['obsid'], merge_identical=True) for obs in recent_obs['obsid']: process_obsids([int(obs)]) # the int() is here to keep json happy downstream
def main(obsid): report_root = REPORT_ROOT strobs = "%05d" % obsid chunk_dir = strobs[0:2] topdir = os.path.join(report_root, chunk_dir) outdir = os.path.join(topdir, strobs) if not os.path.exists(outdir): os.makedirs(outdir) jinja_env = jinja2.Environment( loader=jinja2.PackageLoader('mica.report')) jinja_env.line_comment_prefix = '##' jinja_env.line_statement_prefix = '#' logger.info("Making report for {}".format(obsid)) logger.debug("Getting target info from axafapstat") summary = target_summary(obsid) # Links logger.debug("Looking up obsid links") all_progress = {'science': ['ocat', 'long_term', 'short_term', 'starcheck', 'observed', 'engineering', 'aspect_1', 'cxcds_vv', 'released'], 'er': ['starcheck', 'observed', 'engineering', 'cxcds_vv']} report_status = {} er = summary is None and obsid > 40000 progress = all_progress['er' if er else 'science'] if er: links = obs_links(obsid) else: if summary is None: raise ValueError("Obsid not found in target table") report_status['ocat'] = summary['status'] links = obs_links(obsid, summary['seq_nbr'], summary['lts_lt_plan']) if not er and (summary['status'] in ['canceled', 'unobserved', 'untriggered']): logger.debug( "Obsid {obsid} has status {status}".format( obsid=obsid, status=summary['status'])) if summary is not None: if summary['lts_lt_plan'] is not None: report_status['long_term'] = summary['lts_lt_plan'] if summary['soe_st_sched_date']: report_status['short_term'] = summary['soe_st_sched_date'] last_sched = '' if not er: if summary['lts_lt_plan']: last_sched = "in LTS for {}".format( str(summary['lts_lt_plan'])) if summary['soe_st_sched_date']: last_sched = "in ST sched for {}".format( str(summary['soe_st_sched_date'])) ## Starcheck logger.debug("Fetching starcheck catalog") try: if summary is not None and summary['lts_lt_plan'] is not None: plan_date = Time(summary['lts_lt_plan']) if plan_date.cxcsec > (CxoTime.now() + 21).secs: raise LookupError("No starcheck expected for {} lts date".format(str(plan))) mp_dir, status, mp_date = starcheck.get_mp_dir(obsid) obs_sc, mp_dir, status = get_starcheck(obsid) logger.debug("Plotting starcheck catalog to {}".format(os.path.join(outdir, 'starcheck.png'))) if obs_sc['obs']['point_ra'] is None: raise LookupError("Observation has no pointing.") if len(obs_sc['cat']) == 0: raise LookupError("Observation has no catalog") fig, cat, obs = catalog.plot(obsid, mp_dir) sc = starcheck.get_starcheck_catalog(obsid, mp_dir) fig.savefig(os.path.join(outdir, 'starcheck.png')) plt.close('all') except LookupError as detail: logger.info("No starcheck catalog. Writing out OCAT info only") logger.info(detail) template = jinja_env.get_template('report.html') page = template.render(obsid=obsid, target=summary, links=links, temps=None, pred_temp=None, cat_table=None, er=er if er else None, last_sched=last_sched, obs=None, version=version) full_report_file = os.path.join(outdir, 'index.html') logger.info("Writing out full report to {}".format(full_report_file)) f = open(full_report_file, 'w') f.write(page) f.close() notes = {'report_version': REPORT_VERSION, 'vv_version': None, 'vv_revision': None, 'aspect_1_id': None, 'last_sched': last_sched, 'ocat_status': report_status.get('ocat'), 'long_term': str(report_status.get('long_term')), 'short_term': str(report_status.get('short_term')), 'starcheck': report_status.get('starcheck'), 'obsid': obsid, 'checked_date': CxoTime.now().date} f = open(os.path.join(outdir, 'notes.json'), 'w') f.write(json.dumps(notes, sort_keys=True, indent=4)) f.close() save_state_in_db(obsid, notes) return if not er and 'shortterm' in links: dir_match = re.match(r'/\d{4}/(\w{3}\d{4})/ofls(\w)', mp_dir) mp_label = "{}{}".format(dir_match.group(1), dir_match.group(2).upper()) last_sched = 'in <A HREF="{}">{}</A> at {}'.format( links['shortterm']['link'], mp_label, str(obs_sc['obs']['mp_starcat_time'])) report_status['starcheck'] = mp_dir # engineering data available logger.debug("Getting acq and trak stats") acqs = get_obs_acq_stats(obsid) trak = get_obs_trak_stats(obsid) temps = get_obs_temps(obsid, outdir) pred_temp = sc.get('pred_temp') if acqs or trak: last_sched = "eng. data available" er_status = None if er: stat_map = {'ran': 'ran on', 'approved': 'approved for', 'ran_pretimelines': 'ran on', 'planned': 'planned for'} er_status = "{} {}".format(stat_map[status], obs_sc['obs']['mp_starcat_time']) run_obspar = None vv = None logger.info("Processing ER; no V&V available") else: # obspar ingested try: run_obspar = obspar.get_obspar(obsid, version='last') except: run_obspar = None # v&v available try: vv = get_vv(obsid, version='last') except LookupError: vv = None try: if vv is None or 'vv_version' not in vv or vv['vv_version'] < WANT_VV_VERSION: mica.vv.process.process(obsid, version="last") vv = get_vv(obsid, version='last') for slot in vv['slots']: if isinstance(vv['slots'][slot]['dy_med'], list): mica.vv.process.process(obsid, version="last") vv = get_vv(obsid, version='last') break vv_files = get_vv_files(obsid, version='last') last_sched = "complete through mica v&v" except LookupError: logger.info("No V&V available") vv = None if vv is not None: report_status['mica_vv'] = True for file in vv_files: newfile = os.path.join(outdir, os.path.basename(file)) if not os.path.exists(newfile): logger.debug("linking {} into {}".format(file, outdir)) os.symlink(file, newfile) asp_dir = asp_l1.get_obs_dirs(obsid)['last'] asp_logs = sorted(glob(os.path.join(asp_dir, "asp_l1_f*log*gz"))) for log, interval in zip(asp_logs, vv['intervals']): logmatch = re.search(r'(.*log)\.gz', os.path.basename(log)) if logmatch: newlogname = "{}.txt".format(logmatch.group(1)) newlog = os.path.join(outdir, newlogname) if not os.path.exists(newlog): logger.debug("copying/gunzipping asp log {}".format(newlog)) logtext = gzip.open(log, 'rt').readlines() f = open(newlog, 'w') f.writelines(logtext) f.close() interval['loglink'] = newlogname aiprops = get_aiprops(obsid) aiprops_template = jinja_env.get_template('aiprops.html') aiprops_page = aiprops_template.render(obsid=obsid, aiprops=aiprops) aiprops_page_file = os.path.join(outdir, 'aiprops.html') logger.debug("AIPROPS report to {}".format(aiprops_page_file)) f = open(aiprops_page_file, 'w') f.write(aiprops_page) f.close() props_template = jinja_env.get_template('props.html') props_page = props_template.render(obsid=obsid, vv=vv) props_page_file = os.path.join(outdir, 'props.html') logger.debug("GS/FIDPROPS report to {}".format(props_page_file)) f = open(props_page_file, 'w') f.write(props_page) f.close() for slot in vv['slots']: if 'n_pts' not in vv['slots'][slot]: continue slot_template = jinja_env.get_template('vv_slots_single.html') slot_page = slot_template.render(obsid=obsid, vv=vv, slot=slot) slot_page_file = os.path.join(outdir, "slot_{}.html".format(slot)) logger.debug("VV SLOT report to {}".format(slot_page_file)) f = open(slot_page_file, 'w') f.write(slot_page) f.close() official_notes = official_vv_notes(obsid, summary) if official_notes: for rep in official_notes: if rep['comments'] == 'Hidden': rep['comments'] = """ <A target="_blank" HREF="{}">{}</A><BR>(<A target="_blank" HREF="https://icxc.cfa.harvard.edu/soft/vv/vv_login.html">LOGIN</A> once first)</BR>""".format(links['vv']['link'], links['vv']['label']) vv_template = jinja_env.get_template('vv.html') vv['has_errors'] = (('errors' in vv) and (len(vv['errors']))) or None vv_page = vv_template.render(obsid=obsid, vv=vv, obspar=run_obspar, official_vv_notes=official_notes, ) vv_page_file = os.path.join(outdir, 'vv.html') logger.debug("VV report to {}".format(vv_page_file)) f = open(vv_page_file, 'w') f.write(vv_page) f.close() cat_table = catalog_info(obs_sc['cat'], acqs, trak, vv) for row, cat_row in zip(obs_sc['cat'], cat_table): if row['type'] != 'FID': if row['id'] is not None: s = star_info(row['id']) star_template = jinja_env.get_template('star.html') star_page = star_template.render( star=s['agasc_info'], acqs=s['acqs'], traks=s['traks'], agg_acq=s['agg_acq'], agg_trak=s['agg_trak']) star_page_file = os.path.join(outdir, 'star_%d.html' % int(row['id'])) logger.debug("Writing out star info to {}".format(star_page_file)) f = open(star_page_file, 'w') f.write(star_page) f.close() cat_row['idlink'] = ( '<A HREF="star_{id}.html" STYLE="text-decoration: none;"' 'ONMOUSEOVER="return overlib ' '(\'ACQ total:{n_acq} noid:{n_noid} <BR /> ' 'GUI total:{n_gui} bad:{n_bad} fail:{n_fail} obc_bad:{n_obc_bad} ' '<BR /> Avg Mag {avg_mag:4.2f}\', WIDTH, 220);", ONMOUSEOUT="return nd();"> ' '{id}</A>'.format(id=int(row['id']), n_acq=s['agg_acq']['n_acqs'], n_noid=s['agg_acq']['n_acq_noid'], n_gui=s['agg_trak']['n_guis'], n_bad=s['agg_trak']['n_bad'], n_fail=s['agg_trak']['n_fail'], n_obc_bad=s['agg_trak']['n_obc_bad'], avg_mag=(s['agg_trak']['avg_mag'] or s['agg_acq']['avg_mag'] or 13.94))) else: cat_row['idlink'] = " " else: if 'id' in row: cat_row['idlink'] = int(row['id']) else: cat_row['idlink'] = '' template = jinja_env.get_template('report.html') page = template.render(cat_table=cat_table, obs=obs, sc=obs_sc, vv=vv, links=links, target=summary, temps=temps, pred_temp=pred_temp, er=er if er else None, er_status=er_status, last_sched=last_sched, obsid=obsid, version=version) full_report_file = os.path.join(outdir, 'index.html') logger.info("Writing out full report to {}".format(full_report_file)) f = open(full_report_file, 'w') f.write(page) f.close() cat_file = os.path.join(outdir, 'star_table.json') f = open(cat_file, 'w') f.write(json.dumps(cat_table, sort_keys=True, indent=4)) f.close() notes = {'report_version': REPORT_VERSION, 'vv_version': None, 'vv_revision': None, 'aspect_1_id': None, 'last_sched': last_sched, 'ocat_status': report_status.get('ocat'), 'long_term': str(report_status.get('long_term')), 'short_term': str(report_status.get('short_term')), 'starcheck': report_status.get('starcheck'), 'obsid': obsid, 'checked_date': CxoTime.now().date} if vv: notes['vv_version'] = vv.get('vv_version') notes['vv_revision'] = vv.get('revision') notes['aspect_1_id'] = vv.get('aspect_1_id') f = open(os.path.join(outdir, 'notes.json'), 'w') f.write(json.dumps(notes, sort_keys=True, indent=4)) f.close() save_state_in_db(obsid, notes)
def grab_critical_telemetry(start=CxoTime.now() - 60 * u.s): critical_msidlist = [ 'CCSDSTMF', '2SHEV1RT', '2PRBSCR', '2FHTRMZT', '2IMTPAST', '2IMBPAST', '2SPTPAST', '2SPBPAST', '2TLEV1RT', '2VLEV1RT' ] critical_msids = fetch.get_telem(critical_msidlist, start=start, quiet=True, unit_system='eng') tm_format = critical_msids['CCSDSTMF'].vals[-1] shield_rate = critical_msids['2SHEV1RT'].vals[-1] if shield_rate > 0: shield_state = 'UP' elif shield_rate == 0: shield_state = 'DOWN' bus_current_in_amps = np.round(critical_msids['2PRBSCR'].vals[-1], 2) bus_current_in_dn = convert_bus_current_to_dn(bus_current_in_amps) fea_temp = np.round(critical_msids['2FHTRMZT'].vals[-1], 2) hrc_i_voltage = (critical_msids['2IMTPAST'].vals[-1], critical_msids['2IMBPAST'].vals[-1]) # HALF voltage for HRC-I is 42/53 # FULL voltage for HRC-S is 79/91 hrc_s_voltage = (critical_msids['2SPTPAST'].vals[-1], critical_msids['2SPBPAST'].vals[-1]) # HALF voltage for HRC-S is 43/54 (top/bottom) # FULL voltage is 95/107 (top/bottom) # Set statuses if tm_format == 'FMT1': hrc_observing_status = 'OBSERVING' else: hrc_observing_status = 'NOT observing' expected_hrc_i_states = [(0, 0), (42, 53), (79, 91)] # in order of off, half, full expected_hrc_s_states = [(0, 0), (43, 54), (95, 107)] # in order of off, half, full expected_status = ['OFF', 'at HALF voltage', 'at FULL voltage'] hrc_telem_status = None hrc_i_status = None hrc_s_status = None try: hrc_i_status = expected_status[expected_hrc_i_states.index( hrc_i_voltage)] except ValueError: hrc_i_status = 'in a POTENTIALLY UNEXPECTED state ({}). CHECK THIS!'.format( hrc_i_voltage) try: hrc_s_status = expected_status[expected_hrc_s_states.index( hrc_s_voltage)] except ValueError: hrc_s_status = 'in a POTENTIALLY UNEXPECTED state ({}). CHECK THIS!'.format( hrc_s_voltage) te_rate = critical_msids['2TLEV1RT'].vals[-1] ve_rate = critical_msids['2VLEV1RT'].vals[-1] telem = { 'HRC observing status': hrc_observing_status, 'Format': tm_format, 'Shield Rate': shield_rate, 'Shield State': shield_state, 'Bus Current (DN)': bus_current_in_dn, 'Bus Current (A)': bus_current_in_amps, 'FEA Temp': fea_temp, 'HRC-I Voltage Steps': hrc_i_voltage, 'HRC-I Status': hrc_i_status, 'HRC-S Voltage Steps': hrc_s_voltage, 'HRC-S Status': hrc_s_status, 'TE Rate': te_rate, 'VE Rate': ve_rate } return telem
def main(): fetch.data_source.set('maude allow_subset=True') args = get_args() fake_comm = args.fake_comm chatty = args.report_errors # Will be True if user set --report_errors if fake_comm: bot_slack_channel = '#bot-testing' elif not fake_comm: bot_slack_channel = bot_slack_channel = '#comm_passes' # Initial settings recently_in_comm = False in_comm_counter = 0 # Loop infinitely :) while True: try: in_comm = are_we_in_comm(verbose=False, cadence=2, fake_comm=fake_comm) if not in_comm: if recently_in_comm: # We might have just had a loss in telemetry. Try again after waiting for a minute time.sleep(60) in_comm = are_we_in_comm(verbose=False, cadence=2) if in_comm: continue # Assuming the end of comm is real, then comm has recently ended and we need to report that. telem = grab_critical_telemetry(start=CxoTime.now() - 1800 * u.s) message = f"It appears that COMM has ended as of `{CxoTime.now().strftime('%m/%d/%Y %H:%M:%S')}` \n\n HRC was *{telem['HRC observing status']}* \n Last telemetry was in `{telem['Format']}` \n\n *HRC-I* was {telem['HRC-I Status']} \n *HRC-S* was {telem['HRC-S Status']} \n\n *Shields were {telem['Shield State']}* with a count rate of `{telem['Shield Rate']} cps` \n\n *HRC-I* Voltage Steps were (Top/Bottom) = `{telem['HRC-I Voltage Steps'][0]}/{telem['HRC-I Voltage Steps'][1]}` \n *HRC-S* Voltage Steps were (Top/Bottom) = `{telem['HRC-S Voltage Steps'][0]}/{telem['HRC-S Voltage Steps'][1]}` \n\n *Bus Current* was `{telem['Bus Current (DN)']} DN` (`{telem['Bus Current (A)']} A`) \n\n *FEA Temperature* was `{telem['FEA Temp']} C`" send_slack_message(message, channel=bot_slack_channel) recently_in_comm = False in_comm_counter = 0 print( f'({CxoTime.now().strftime("%m/%d/%Y %H:%M:%S")}) Not in Comm. ', end='\r\r\r') if in_comm: if fake_comm: # two days to make sure we grab previous comm start_time = CxoTime.now() - 2 * u.d elif not fake_comm: start_time = CxoTime.now( ) - 300 * u.s # 300 sec to make the grab really small if in_comm_counter == 0: # Then start the clock on the comm pass comm_start_timestamp = CxoTime.now() recently_in_comm = True in_comm_counter += 1 time.sleep(5) # Wait a few seconds for MAUDE to refresh latest_vcdu = fetch.Msid('CVCDUCTR', start=start_time).vals[-1] print( f'({CxoTime.now().strftime("%m/%d/%Y %H:%M:%S")} | VCDU {latest_vcdu} | #{in_comm_counter}) In Comm.', end='\r') if in_comm_counter == 5: # Now we've waited ~half a minute or so for MAUDE to update telem = grab_critical_telemetry(start=CxoTime.now() - 8 * u.h) # Craft a message string using this latest elemetry message = f"We are now *IN COMM* as of `{CxoTime.now().strftime('%m/%d/%Y %H:%M:%S')}` (_Chandra_ time). \n\n HRC is *{telem['HRC observing status']}* \n Telemetry Format = `{telem['Format']}` \n\n *HRC-I* is {telem['HRC-I Status']} \n *HRC-S* is {telem['HRC-S Status']} \n\n *Shields are {telem['Shield State']}* with a count rate of `{telem['Shield Rate']} cps` \n\n *HRC-I* Voltage Steps (Top/Bottom) = `{telem['HRC-I Voltage Steps'][0]}/{telem['HRC-I Voltage Steps'][1]}` \n *HRC-S* Voltage Steps (Top/Bottom) = `{telem['HRC-S Voltage Steps'][0]}/{telem['HRC-S Voltage Steps'][1]}` \n \n *Total Event* Rate = `{telem['TE Rate']} cps` \n *Valid Event* Rate = `{telem['VE Rate']} cps` \n \n *Bus Current* is `{telem['Bus Current (DN)']} DN` (`{telem['Bus Current (A)']} A`) \n \n *FEA Temperature* is `{telem['FEA Temp']} C`" # Send the message using our slack bot send_slack_message(message, channel=bot_slack_channel) # do a first audit of the telemetry upon announcement if in_comm_counter == 10: # Now we've waited a minute. Let's audit the telemetry and send amessage. audit_telemetry(start=comm_start_timestamp, channel=bot_slack_channel) except Exception as e: # MAUDE queries fail regularly as TM is streaming in (mismatched array sizes as data is being populated), 404s, etc. # The solution is almost always to simply try again. Therefore this script just presses on in the event of an Exception. if chatty: # Then we want a verbose error message, because we're obviously in testing mode print( f'({CxoTime.now().strftime("%m/%d/%Y %H:%M:%S")}) ERROR: {e}' ) print("Heres the traceback:") print(traceback.format_exc()) print("Pressing on...") elif not chatty: # Then we're likely in operational mode. Ignore the errors on the command line. print( f'({CxoTime.now().strftime("%m/%d/%Y %H:%M:%S")}) ERROR encountered! Use --report_errors to display them. ', end='\r\r\r') if in_comm_counter > 0: # Reset the comm counter to make the error "not count" in_comm_counter -= 1 continue