def main(): parser = argparse.ArgumentParser(description='MWA - Add observations to Workflow Manager') parser.add_argument('--config_file', dest='config_file', required=False, help="Specify the complete path to the config file, by default we'll use etc/still.cfg") parser.add_argument('-o', dest='obsnums', required=False, nargs='+', help="List of obervations seperated by spaces") parser.add_argument('--generate', dest='generate', required=False, help="Generates a list of unprocessed obsnums, note that projID and filecount have not been checked") parser.set_defaults(config_file="%setc/still.cfg" % basedir) args, unknown = parser.parse_known_args() sg = SpawnerClass() wf = WorkFlow() sg.config_file = args.config_file process_client_config_file(sg, wf) dbi = get_dbi_from_config(args.config_file) dbi.test_db() # Testing the database to make sure we made a connection, its fun.. if args.generate: ingest_addtional_opsids(sg) else: for obsid in args.obsnums: print("Obsid: %s") % obsid dbi.add_observation(obsid, obsid, "GPS", None, None, None, outputhost=None, length=None, status='NEW')
def main(): parser = argparse.ArgumentParser(description="MWA - Add observations to Workflow Manager") parser.add_argument( "--config_file", dest="config_file", required=False, help="Specify the complete path to the config file, by default we'll use etc/still.cfg", ) parser.add_argument("-o", dest="obsnums", required=False, nargs="+", help="List of obervations seperated by spaces") parser.set_defaults(config_file="%setc/still.cfg" % basedir) args, unknown = parser.parse_known_args() sg = SpawnerClass() wf = WorkFlow() sg.config_file = args.config_file process_client_config_file(sg, wf) dbi = get_dbi_from_config(args.config_file) dbi.test_db() # Testing the database to make sure we made a connection, its fun.. for obsid in args.obsnums: print("Obsid: %s") % obsid dbi.add_observation(obsid, obsid, "GPS", None, None, None, outputhost=None, length=None, status="NEW")
def main(): parser = argparse.ArgumentParser( description='MWA - Add observations to Workflow Manager') parser.add_argument( '--config_file', dest='config_file', required=False, help= "Specify the complete path to the config file, by default we'll use etc/still.cfg" ) parser.add_argument('-o', dest='obsnums', required=False, nargs='+', help="List of obervations seperated by spaces") parser.add_argument( '--generate', dest='generate', required=False, help= "Generates a list of unprocessed obsnums, note that projID and filecount have not been checked" ) parser.set_defaults(config_file="%setc/still.cfg" % basedir) args, unknown = parser.parse_known_args() sg = SpawnerClass() wf = WorkFlow() sg.config_file = args.config_file process_client_config_file(sg, wf) dbi = get_dbi_from_config(args.config_file) dbi.test_db( ) # Testing the database to make sure we made a connection, its fun.. if args.generate: ingest_addtional_opsids(sg) else: for obsid in args.obsnums: print("Obsid: %s") % obsid dbi.add_observation(obsid, obsid, "GPS", None, None, None, outputhost=None, length=None, status='NEW')
def main (args): o = optparse.OptionParser () o.set_usage ('load_observations_librarian.py') o.set_description (__doc__) o.add_option('--connection', help='the name of the Librarian connection to use (specified in .hl_client.cfg)') opts, args = o.parse_args (args) # Some boilerplate to set up the database interface ... spawner = SpawnerClass() workflow = WorkFlow() spawner.config_file = os.path.join (basedir, 'etc/still.cfg') process_client_config_file (spawner, workflow) dbi = get_dbi_from_config (spawner.config_file) dbi.test_db () # Get the list of potentially-relevant files from the Librarian. lc = hera_librarian.LibrarianClient (opts.connection) try: listing = lc.list_files_without_history_item (librarian_source, rtp_processed_key) except hera_librarian.RPCFailedError as e: print ('RPC to librarian failed: %s' % e.message) sys.exit (1) try: files = listing['files'] len (files) except Exception as e: print ('unexpected response from librarian: %s' % e) sys.exit (1) if not len (files): print ('No new files.') return # for each file we should have a dict of at least: # # name -- something like 2456892/zen.2456892.49664.xx.uv # obsid -- the obsid associated with this file # create_time -- the Unix timestamp that the file was sent to the Librarian # size -- file size in bytes # type -- file "type" stored in the Librarian; "uv" for UV data # md5 -- the MD5 of the file contents; XXX may be calculated weirdly by Librarian # store_ssh_prefix -- the Librarian "ssh_prefix" of the file's storage location # store_path_prefix -- the Librarian "path_prefix" of the file's storage location # # We start by extracting a few useful pieces of meta-information: for f in files: f['jd'] = float (path_to_jd (f['name'])) f['pol'] = path_to_pol (f['name']) # Extract the hostname of the store on which this file is stored from # its store's ssh_prefix. The prefix will look like "user@host", # but the "user@" part might not be present. f['store_host'] = f['store_ssh_prefix'].split ('@', 1)[-1] # Meanwhile, the RTP system expects the filenames to be absolute paths. f['name'] = f['store_path_prefix'] + '/' + f['name'] # If at all possible, get a default observation length from the # separations between observations, in case we have any funky datasets # without the length embedded. We are somewhat recklessly assuming that # even if this batch of datasets spans different nights, they all will # have the same DJD; this doesn't seem too unreasonable. pols = list (set (f['pol'] for f in files)) bestjds = [] for pol in pols: jds = np.sort ([f['jd'] for f in files if f['pol'] == pol]) if len (jds) > len (bestjds): bestjds = jds default_djd = None if len (bestjds) > 2: default_djd = np.median (np.diff (bestjds)) print ('Inferring default djd = %.5f days' % default_djd) for f in files: f['djd'] = default_djd # Buuut let's get djd straight from the data if at all possible. If there # are any files for which we have no idea about the djd, we can't add # them. For everything else, no we can compute the 'obsnum' magic number # (which is not the same as obsid!). for f in files: djd = try_get_file_djd (f) if djd is not None: f['djd'] = djd files = [f for f in files if f.get ('djd') is not None] for f in files: f['obsnum'] = str (jdpol2obsnum (f['jd'], f['pol'], f['djd'])) # Now let's fill in the "neighbor" information. XXX: if we only get, say, # a random subset of observations from one night, this information will be # grievously incomplete! I don't see a way around that given the way that # this aspect of things is handled at the moment. for pol in pols: sfiles = sorted ((f for f in files if f['pol'] == pol), key=lambda f: f['jd']) for i in xrange (len (sfiles)): f_this = sfiles[i] if i > 0: f_prev = sfiles[i - 1] if (f_this['jd'] - f_prev['jd']) < (1.2 * f_this['djd']): f_this['neighbor_low'] = f_prev['jd'] if i < len (sfiles) - 1: f_next = sfiles[i + 1] if (f_next['jd'] - f_this['jd']) < (1.2 * f_this['djd']): f_this['neighbor_high'] = f_next['jd'] # Now that we've computed everything, avoid duplicating files that we # already know about. from sqlalchemy.orm.exc import NoResultFound def not_already_seen (filerec): try: obs = dbi.get_obs (filerec['obsnum']) print (repr (obs)) return False except NoResultFound: return True n_before = len (files) files = [f for f in files if not_already_seen (f)] if len (files) != n_before: print ('Dropping %d already-ingested files.' % (n_before - len (files))) if not len (files): print ('Nothing to add.') return # Let's go for it. try: print ('Attempting to add %d observations to the still ...' % len (files)) dbi.add_observations ([augmented_file_to_obsinfo (f) for f in files], initial_status) except Exception as e: print ('addition failed! here\'s what was attempted:', file=sys.stderr) for f in files: print ('', file=sys.stderr) print (augmented_file_to_obsinfo (f), file=sys.stderr) raise
def main(args): o = optparse.OptionParser() o.set_usage('load_observations_librarian.py') o.set_description(__doc__) o.add_option( '--connection', help= 'the name of the Librarian connection to use (as in ~/.hl_client.cfg)') o.add_option('--config_file', help='RTP configuration file default=RTP/etc/still.cfg', default='etc/still.cfg') o.add_option( '--source', help= 'Only load files originating from the named "source" (default "%default")', default='correlator') opts, args = o.parse_args(args) # Some boilerplate to set up the database interface ... spawner = SpawnerClass() workflow = WorkFlow() spawner.config_file = os.path.join(basedir, opts.config_file) process_client_config_file(spawner, workflow) dbi = get_dbi_from_config(spawner.config_file) dbi.test_db() # Get the list of potentially-relevant files from the Librarian. lc = hera_librarian.LibrarianClient(opts.connection) try: listing = lc.describe_session_without_event(opts.source, rtp_ingested_key) except hera_librarian.RPCError as e: print('RPC to librarian failed: %s' % e.message) sys.exit(1) if not listing['any_matching']: print('No new sessions.') return # For each record we get a dict of at least # # date -- the start Julian Date of the observation # pol -- the polarization of the data ("xx" or "yy") # store_path -- the path of a file instance *within* a store # path_prefix -- the store's path prefix, used to construct full paths # host -- the hostname of the store # length -- the duration of the observation in days # # This is a pretty good start ... because of course the Librarian's API # call has been engineered to give us what we need. def augment_record(r): return { 'obsnum': os.path.basename( r['store_path']), # NOTE: this is actually free text 'date': r['date'], 'date_type': 'julian', 'pol': r['pol'], 'host': r['host'], 'filename': os.path.join(r['path_prefix'], r['store_path']), 'path_prefix': r['path_prefix'], 'outputhost': '', 'status': initial_status, 'length': r['length'], } obsinfos = [augment_record(r) for r in listing['info']] # Now we need to fill in the "neighbor" information. pols = set(oi['pol'] for oi in obsinfos) for pol in pols: soi = sorted((oi for oi in obsinfos if oi['pol'] == pol), key=lambda oi: oi['date']) for i in xrange(len(soi)): oi_this = soi[i] if i > 0: oi_prev = soi[i - 1] if (oi_this['date'] - oi_prev['date']) < (1.2 * oi_this['length']): oi_this['neighbor_low'] = oi_prev['date'] if i < len(soi) - 1: oi_next = soi[i + 1] if (oi_next['date'] - oi_this['date']) < (1.2 * oi_this['length']): oi_this['neighbor_high'] = oi_next['date'] # Now that we've computed everything, avoid duplicating files that we # already know about. We shouldn't end up ever trying to submit # duplicates, but in practice ... from sqlalchemy.orm.exc import NoResultFound def not_already_seen(oi): try: obs = dbi.get_obs(oi['obsnum']) return False except NoResultFound: return True n_before = len(obsinfos) obsinfos = [oi for oi in obsinfos if not_already_seen(oi)] if len(obsinfos) != n_before: print('Dropping %d already-ingested records.' % (n_before - len(obsinfos))) if not len(obsinfos): print('Nothing to add.') return # Try ingesting into the RTP. try: print('Attempting to add %d observations to the still ...' % len(obsinfos)) dbi.add_observations(obsinfos, initial_status) except Exception as e: print('addition failed! here\'s what was attempted:', file=sys.stderr) print('', file=sys.stderr) for oi in obsinfos: print(oi, file=sys.stderr) raise # Add events to the Librarian indicating that these files were # successfully ingested into the RTP. for oi in obsinfos: lc.create_file_event(os.path.basename(oi['filename']), rtp_ingested_key)
help='set the observation to this status, default will be the first item in the config workflow_actions') parser.add_argument('--config_file', dest='config_file', required=False, help="Specify the complete path to the config file") parser.add_argument('jds', nargs='+',type=str,metavar='JD', help="List of integer julian dates to reset.") parser.set_defaults(config_file="%setc/still.cfg" % basedir) args = parser.parse_args() sg = SpawnerClass() wf = WorkFlow() sg.config_file = args.config_file process_client_config_file(sg, wf) if args.status == '': args.status = wf.workflow_actions[0] # connect to the database dbi = StillDataBaseInterface(sg.dbhost, sg.dbport, sg.dbtype, sg.dbname, sg.dbuser, sg.dbpasswd, test=False) # Setup logging logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('reset_observations.py') if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO)
help="Specify the complete path to the config file") parser.add_argument('jds', nargs='+', type=str, metavar='JD', help="List of integer julian dates to reset.") parser.set_defaults(config_file="%setc/still.cfg" % basedir) args = parser.parse_args() sg = SpawnerClass() wf = WorkFlow() sg.config_file = args.config_file process_client_config_file(sg, wf) if args.status == '': args.status = wf.workflow_actions[0] # connect to the database dbi = StillDataBaseInterface(sg.dbhost, sg.dbport, sg.dbtype, sg.dbname, sg.dbuser, sg.dbpasswd, test=False) # Setup logging logging.basicConfig(
def main(args): # define option parsing function o = optparse.OptionParser() o.set_usage('rtp_summary.py') o.set_description(__doc__) o.add_option('--config_file', help='RTP configuration file; default=etc/rtp_hera_h1c.cfg', default='etc/rtp_hera_h1c.cfg', type=str) o.add_option('--date', help='JD for which to generate the status report;' ' defaults to the current JD, minus 1 (i.e., the previous night\'s observation)', default=0, type=int) opts, args = o.parse_args(args) # create a database interface spawner = SpawnerClass() spawner.config_file = os.path.join(basedir, opts.config_file) dbi = get_dbi_from_config(spawner.config_file) dbi.test_db() # summarize the status of the specified JD's observation if opts.date == 0: # get today's JD, subtract one to get yesterday, and make a new time # object for computing the (Gregorian) calendar date t = Time.now() jd_of_interest = int(t.jd) - 1 t = Time(val=jd_of_interest, format='jd') else: t = Time(val=opts.date, format='jd') jd_of_interest = opts.date # convert JD to unix epoch to create datetime object, for writing human dates date = datetime.datetime.fromtimestamp(t.unix) datestr = date.strftime('%a %b %d, %Y') # query database s = dbi.Session() obsnums = s.query(Observation).filter( Observation.obsnum.like('zen.{:d}%'.format(jd_of_interest))).all() nobs = len(obsnums) if nobs == 0: print("No observations for JD {0:d} ({1})\n".format(jd_of_interest, datestr)) return else: # categorize observations ncomplete = 0 nworking = 0 nfailed = 0 for obs in obsnums: if obs.status == "COMPLETE": ncomplete += 1 elif (obs.current_stage_in_progress == "FAILED" or obs.current_stage_in_progress == "KILLED"): nfailed += 1 else: nworking += 1 s.close() # make sure we didn't have any observations fall through the cracks if ncomplete + nfailed + nworking != nobs: print("Had {:d} total observations, {:d} complete, {:d} working, {:d} failed;" " totals don\'t match!".format(nobs, ncomplete, nworking, nfailed)) return # write out report pct_comp = ncomplete / nobs * 100 pct_work = nworking / nobs * 100 pct_fail = nfailed / nobs * 100 print("RTP report for JD {0:d} ({1})\n".format(jd_of_interest, datestr)) print("Number of observations: {:d}".format(nobs)) print("Number finished processing: {0:d} ({1:d}%)".format(ncomplete, int(pct_comp))) print("Number currently processing: {0:d} ({1:d}%)".format(nworking, int(pct_work))) print("Number failed: {0:d} ({1:d}%)".format(nfailed, int(pct_fail))) return
# setup my curses stuff following # https://docs.python.org/2/howto/curses.html stdscr = curses.initscr() curses.noecho() curses.cbreak() stdscr.keypad(1) stdscr.nodelay(1) # setup my db connection # Jon : set this up correctly, read conf file config_file = basedir + 'etc/still.cfg' sg = SpawnerClass() wf = WorkFlow() sg.config_file = config_file process_client_config_file(sg, wf) dbi = get_dbi_from_config(config_file) dbi.test_db() # Testing the database to make sure we made a connection, its fun.. stdscr.addstr("DiStiller Status Board. Monitoring") stdscr.addstr(1, 0, "Press 'q' to exit") statheight = 50 statusscr = curses.newwin(statheight, 400, 5, 0) statusscr.keypad(1) statusscr.nodelay(1) curline = 2 colwidth = 50 obslines = 20 stat = ['\\', '|', '/', '-', '.']
'--overwrite', action='store_true', help= 'Default action is to skip obsrvations already in the db. Setting this option overrides this safety feature and attempts anyway' ) opts, args = o.parse_args(sys.argv[1:]) # connect to the database # Jon : set this up correctly, read conf file config_file = basedir + 'etc/still.cfg' sg = SpawnerClass() wf = WorkFlow() sg.config_file = config_file process_client_config_file(sg, wf) dbi = get_dbi_from_config(config_file) # Testing the database to make sure we made a connection, its fun.. dbi.test_db() # dbi = DataBaseInterface() # check that all files exist for filename in args: print filename assert (filename.startswith('/')) assert (os.path.exists(filename)) # now run through all the files and build the relevant information for the db
def main (args): o = optparse.OptionParser () o.set_usage ('load_observations_librarian.py') o.set_description (__doc__) o.add_option('--connection', help='the name of the Librarian connection to use (as in ~/.hl_client.cfg)') o.add_option('--config_file',help='RTP configuration file default=RTP/etc/still.cfg',default='etc/still.cfg') o.add_option('--source', help='Only load files originating from the named "source" (default "%default")', default='correlator') opts, args = o.parse_args (args) # Some boilerplate to set up the database interface ... spawner = SpawnerClass() workflow = WorkFlow() spawner.config_file = os.path.join (basedir, opts.config_file) process_client_config_file (spawner, workflow) dbi = get_dbi_from_config (spawner.config_file) dbi.test_db () # Get the list of potentially-relevant files from the Librarian. lc = hera_librarian.LibrarianClient (opts.connection) try: listing = lc.describe_session_without_event (opts.source, rtp_ingested_key) except hera_librarian.RPCError as e: print ('RPC to librarian failed: %s' % e.message) sys.exit (1) if not listing['any_matching']: print ('No new sessions.') return # For each record we get a dict of at least # # date -- the start Julian Date of the observation # pol -- the polarization of the data ("xx" or "yy") # store_path -- the path of a file instance *within* a store # path_prefix -- the store's path prefix, used to construct full paths # host -- the hostname of the store # length -- the duration of the observation in days # # This is a pretty good start ... because of course the Librarian's API # call has been engineered to give us what we need. def augment_record (r): return { 'obsnum': os.path.basename (r['store_path']), # NOTE: this is actually free text 'date': r['date'], 'date_type': 'julian', 'pol': r['pol'], 'host': r['host'], 'filename': os.path.join (r['path_prefix'], r['store_path']), 'path_prefix': r['path_prefix'], 'outputhost': '', 'status': initial_status, 'length': r['length'], } obsinfos = [augment_record (r) for r in listing['info']] # Now we need to fill in the "neighbor" information. pols = set (oi['pol'] for oi in obsinfos) for pol in pols: soi = sorted ((oi for oi in obsinfos if oi['pol'] == pol), key=lambda oi: oi['date']) for i in xrange (len (soi)): oi_this = soi[i] if i > 0: oi_prev = soi[i - 1] if (oi_this['date'] - oi_prev['date']) < (1.2 * oi_this['length']): oi_this['neighbor_low'] = oi_prev['date'] if i < len (soi) - 1: oi_next = soi[i + 1] if (oi_next['date'] - oi_this['date']) < (1.2 * oi_this['length']): oi_this['neighbor_high'] = oi_next['date'] # Now that we've computed everything, avoid duplicating files that we # already know about. We shouldn't end up ever trying to submit # duplicates, but in practice ... from sqlalchemy.orm.exc import NoResultFound def not_already_seen (oi): try: obs = dbi.get_obs (oi['obsnum']) return False except NoResultFound: return True n_before = len (obsinfos) obsinfos = [oi for oi in obsinfos if not_already_seen (oi)] if len (obsinfos) != n_before: print ('Dropping %d already-ingested records.' % (n_before - len (obsinfos))) if not len (obsinfos): print ('Nothing to add.') return # Try ingesting into the RTP. try: print ('Attempting to add %d observations to the still ...' % len (obsinfos)) dbi.add_observations (obsinfos, initial_status) except Exception as e: print ('addition failed! here\'s what was attempted:', file=sys.stderr) print ('', file=sys.stderr) for oi in obsinfos: print (oi, file=sys.stderr) raise # Add events to the Librarian indicating that these files were # successfully ingested into the RTP. for oi in obsinfos: lc.create_file_event (os.path.basename (oi['filename']), rtp_ingested_key)