def init_subDAG(tag, outdir, logdir, cp): # generate job dag = pipeline.CondorDAG('%s/%s_%s.log' % (logdir,tag,\ tempfile.mktemp(dir='',prefix=''))) dag.set_dag_file(os.path.join(outdir, tag)) # set max jobs args if cp.has_option('condor-max-jobs', tag): dag.add_maxjobs_category(tag, cp.getint('condor-max-jobs', tag)) return dag
try: usertag = string.strip(cp.get('pipeline', 'user-tag')) inspstr = 'INSPIRAL_' + usertag except: usertag = None inspstr = 'INSPIRAL' # create a log file that the Condor jobs will write to basename = re.sub(r'\.dag', r'', dag_file_name) logfile = os.path.join(log_path, basename + '.log') # create a DAG generation log file log_fh = open(basename + '.pipeline.log', 'w') # create the DAG writing the log to the specified file dag = pipeline.CondorDAG(logfile) dag.set_dag_file(dag_file_name) # Build a set of jobs for each ifo for ifo in ifo_list: if not cp.has_option('segments', ifo.lower() + '-analyze'): continue # decide if we need to segment the data available_segments = get_valid_segments( cp.get('segfind', 'segment-url'), cp.get('framefind', 'base-dir'), ifo, cp.get('segments', ifo.lower() + '-analyze'), gps_start_time, gps_end_time) if not available_segments: print("No available segments for %s, skipping" % ifo)
def main(rundir, outdir, ifotag, grb, inifile, injfile, verbose=False,\ logdir=None, vetoDir=None,run_combiner=True, run_clustering=True,\ run_injfind=True, run_sbvplotter=True, run_efficiency=True,\ run_injcombiner=True,run_horizon_dist_plot=True,timeSlides=False): # load ini files if verbose: print >> sys.stdout print >>sys.stdout, 'Initialising post processing driver, '+\ 'loading configuration files...' # get directory grbdir = os.path.abspath('%s/GRB%s' % (rundir, grb)) if not os.path.isdir(grbdir): raise ValueError, 'Cannot find directory GRB%s in %s' % (grb, rundir) # generate post processing directory if not os.path.isdir(outdir): os.makedirs(outdir) # generat subdirectories os.chdir(outdir) plotdir = 'output' exedir = 'executables' if not logdir: logdir = '%s/%s' % (outdir, 'logs') for d in [plotdir, exedir, logdir]: if not os.path.isdir(d): os.mkdir(d) # load ini file cp = ConfigParser.ConfigParser() cp.optionxform = str cp.read(inifile) # Add veto directory if needed if vetoDir: if run_sbvplotter: cp.set('sbv_plotter', 'veto-directory', vetoDir) if run_efficiency: cp.set('efficiency', 'veto-directory', vetoDir) cp.set('injection-efficiency', 'veto-directory', vetoDir) # load inj file if injfile: injcp = ConfigParser.ConfigParser() injcp.optionxform = str injcp.read(injfile) # find injection runs injruns = injcp.sections() else: injruns = [] usertag = cp.get('input', 'user-tag') # ========= # get times # ========= # get times from datafind cache datafindstr = '%s/datafind/%s-INSPIRAL_HIPE_GRB%s_DATAFIND-*-*.cache'\ % (grbdir, ifotag, grb) datafindglob = glob.glob(datafindstr) if len(datafindglob) != 1: raise ValueError, 'Cannot find single datafind cache matching %s' % datafindstr datafindcache = datafindglob[0] datastart, dataduration = map(int, os.path.splitext(datafindcache)[0]\ .split('-')[-2:]) pad = cp.getint('data', 'pad-data') start = datastart + pad duration = dataduration - 2 * pad # ================ # find onoff and timeslides cache # ================ onoffcache = '%s/onoff/' % grbdir onoffcache += '%s-INSPIRAL_HIPE_GRB%s_ZERO_LAG_CATEGORY_1-%s-%s.cache'\ % (ifotag, grb, datastart, dataduration) # Get the appropriate tag zlCache = lal.Cache.fromfile(open(onoffcache, 'r')) zlCache = zlCache.sieve(description=usertag) fileDescription = zlCache[0].description.split('_') grbIndex = fileDescription.index('GRB%s' % (grb)) + 1 zlString = '_'.join(fileDescription[grbIndex:]) if timeSlides: timeSlidesCache = '%s/timeslides/' % grbdir timeSlidesCache += \ '%s-INSPIRAL_HIPE_GRB%s_TIME_SLIDES_CATEGORY_1-%s-%s.cache'\ % (ifotag, grb, datastart, dataduration) # Count and store how many long slides they are, and their names # Open cache and remove the TMPLTBANK file slideCache = lal.Cache.fromfile(open(timeSlidesCache, 'r')) slideCache = slideCache.sieve(description=usertag) # Identify and store unique desciriptions slideStrings = [zlString] for entry in slideCache: fileDescription = entry.description.split('_') # Remove user tag and split bank number grbIndex = fileDescription.index('GRB%s' % (grb)) + 1 redDescription = '_'.join(fileDescription[grbIndex:]) if redDescription not in slideStrings: slideStrings.append(redDescription) numLongSlides = len(slideStrings) else: timeSlidesCache = None # ============== # set parameters # ============== universe = cp.get('condor', 'universe') for (job, executable) in cp.items('condor'): if job == 'universe': continue # replace tilde in executable executable = os.path.expanduser(executable) # replace environment variables in executable executable = os.path.expandvars(executable) # copy executable to exedir executable2 = os.path.join(outdir, exedir, os.path.basename(executable)) if not os.path.isfile(executable2) or\ not os.path.samefile(executable, executable2): shutil.copy(executable, executable2) cp.set('condor', job, executable2) dirsList = ['trig_combiner', 'efficiency', 'injection-efficiency'] if timeSlides: dirsList.append('trig_combiner_part2') for dir in dirsList: cp.set(dir, 'pad-data', cp.get('data', 'pad-data')) cp.set(dir, 'segment-length', cp.get('data', 'segment-length')) # ========== # write dags # ========== if verbose: print >> sys.stdout print >> sys.stdout, "Generating dag..." # initialise uberdag dagtag = os.path.splitext(os.path.basename(inifile))[0] uberdag = pipeline.CondorDAG("%s/%s_uberdag.log" % (logdir, dagtag)) uberdag.set_dag_file('%s_uberdag' % (dagtag)) DAGManNode = {} # ================== # generate time tags # ================== numtrials = cp.getint('input', 'num-trials') timetags = [ 'ALL_TIMES', 'ONSOURCE', 'OFFSOURCE' ] +\ [ 'OFFTRIAL_%d' % t for t in xrange(1,numtrials+1) ] if timeSlides: timeSlideTags = ['ALL_TIMES', 'OFFSOURCE'] minmemory = 0 if cp.has_option('pipeline', 'minimum-memory'): minmemory = cp.getfloat('pipeline', 'minimum-memory') # ============= # trig_combiner # ============= if run_combiner: tag = 'trig_combiner' exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # setup onoff node if timeSlides: node = trig_combiner_setup(job, tag, ifotag, usertag, grb, None,\ grbdir, numtrials, outdir, slidetag=zlString,\ timeslidecache=onoffcache) else: node = trig_combiner_setup(job, tag, ifotag, usertag, grb, onoffcache,\ grbdir, numtrials, outdir) dag.add_node(node) if timeSlides: # setup long slide nodes for slideTag in slideStrings: if slideTag == zlString: continue node = trig_combiner_setup(job, tag, ifotag, usertag, grb, None,\ grbdir, numtrials, outdir,slidetag=slideTag,\ timeslidecache=timeSlidesCache,) dag.add_node(node) # finalise DAG DAGManNode[tag] = finalise_DAG(dag) uberdag.add_node(DAGManNode[tag]) # ============ # trig_cluster # ============ trigfile = {} clstfile = {} for timetype in timetags: trigfile[timetype] = '%s-%s_GRB%s_%s-%s-%s.xml.gz'\ % (ifotag, usertag, grb, timetype, start, duration) clstfile[timetype] = trigfile[timetype].replace(timetype,\ '%s_CLUSTERED' % timetype) if timeSlides: trigslidefile = {} clstslidefile = {} fileList = {} for timetype in timeSlideTags: fileList[timetype] = [] for slideString in slideStrings: trigslidefile[timetype+slideString] = \ '%s-%s_TIMESLIDES_GRB%s_%s_%s-%s-%s.xml.gz'\ % (ifotag,usertag,grb,slideString,timetype,start,duration) clstslidefile[timetype+slideString] = \ trigslidefile[timetype+slideString].replace(\ timetype,'%s_CLUSTERED' % timetype) fileList[timetype].append(clstslidefile[timetype + slideString]) if run_clustering: tag = 'trig_cluster' exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # loop time tags for timetype in timetags: node = trig_cluster_setup(job, tag, trigfile[timetype], outdir) dag.add_node(node) if timeSlides: for timetype in timeSlideTags: for slideString in slideStrings: node = trig_cluster_setup(job, tag,\ trigslidefile[timetype+slideString], outdir) dag.add_node(node) # finalise DAG parents = [] if run_combiner: parents.append(DAGManNode['trig_combiner']) DAGManNode[tag] = finalise_DAG(dag, parents) uberdag.add_node(DAGManNode[tag]) # ========= # trigcombiner (part 2) # ========= # Only needed if doing time slides and output files are split up at this point # This only combines clustered triggers, as there are too many unclustered if timeSlides: tag = 'trig_combiner_part2' exe = cp.get('condor', 'trig_combiner') # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # Create cache file currTag = 'COMBINED_CLUSTERED_COHPTF_%s' % (usertag) cache_fname = '%s-%s_GRB%s_%s-%s-%s.cache'\ % (ifotag,currTag,grb,'ALL_TIMES',start,duration) output_data_cache = lal.Cache.from_urls(fileList['ALL_TIMES']) output_data_cache.tofile(open(cache_fname, "w")) for timetype in timeSlideTags: clstslidefile[timetype] = \ '%s-%s_TIMESLIDES_GRB%s_%s-%s-%s.xml.gz'\ % (ifotag,usertag,grb,timetype,\ start,duration) node = trig_combiner_setup(job, tag, ifotag, usertag, grb, None,\ grbdir, numtrials, outdir,\ timeslidecache=cache_fname) dag.add_node(node) # finalise DAG parents = [] if run_combiner: parents.append(DAGManNode['trig_cluster']) DAGManNode[tag] = finalise_DAG(dag, parents) uberdag.add_node(DAGManNode[tag]) # ========= # injfinder # ========= if run_injfind: # find buffer segments buffseg = '%s/%s' % (grbdir, 'bufferSeg.txt') if not os.path.isfile(buffseg): raise ValueError, 'Cannot find buffer segment file as %s' % buffseg tag = 'injfinder' exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # construct arguments job.add_opt('output-dir', outdir) job.add_opt('exclude-segments', buffseg) for injrun in injruns: node = injfind_setup(job, tag, '%s/%s' %(grbdir,injrun), injrun,\ ifotag, grb, datastart, dataduration) dag.add_node(node) # finalise DAG parents = [] DAGManNode[tag] = finalise_DAG(dag, parents) uberdag.add_node(DAGManNode[tag]) # =========== # injcombiner # =========== tag = 'injcombiner' # get necessary configuration variables injpatterns = cp.get('%s-meta' % tag, 'injection-patterns').split(',') inclinations = map(int, cp.get('%s-meta' % tag,\ 'injection-inclinations').split(',')) if run_injcombiner: exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # generate cache of FOUND/MISSED files fmcachefile = '%s/%s-GRB%s_FOUNDMISSED-%s-%s.cache'\ % (outdir, ifotag, grb, start, duration) fmfiles = ['%s/%s-INJECTION_GRB%s_%s_FOUND-%s-%s.xml'\ % (outdir, ifotag, grb, injrun, start, duration)\ for injrun in injruns] + \ ['%s/%s-INJECTION_GRB%s_%s_MISSED-%s-%s.xml'\ % (outdir, ifotag, grb, injrun, start, duration)\ for injrun in injruns] fmcache = lal.Cache.from_urls(fmfiles) fmcache.tofile(open(fmcachefile, 'w')) for injpattern in injpatterns: if injpattern == 'NONE': continue for inc in inclinations: node = injcombiner_setup(job, tag, outdir, fmcachefile, injpattern, inc) dag.add_node(node) # finalise DAG parents = [] if run_injfind: parents.append(DAGManNode['injfinder']) DAGManNode[tag] = finalise_DAG(dag, parents) uberdag.add_node(DAGManNode[tag]) # =========== # sbv_plotter # =========== filteredInjRuns = [] filteredDistRuns = {} # Append runs that have not been split if injruns: for run in injruns: append = True for injpattern in injpatterns: if injpattern in run: append = False break if append: filteredInjRuns.append(run) filteredDistRuns[run] = run # and append the runs that have been split for injpattern in injpatterns: if injpattern == 'NONE': continue distRun = None for run in injruns: if injpattern in run: distRun = run break if not distRun: raise BrokenError, "Cannot find any injections matching %s in ini file"\ % (injpattern) for inc in inclinations: injrun = 'injectionsAstro%s_FILTERED_%d' % (injpattern, inc) filteredInjRuns.append(injrun) filteredDistRuns[injrun] = distRun if run_sbvplotter: tag = 'sbv_plotter' exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) for timetype in ['ALL_TIMES', 'OFFSOURCE']: # setup SBV plotting job sbvoutpath = '%s/%s/%s/plots' % (outdir, plotdir, timetype) node = sbv_setup(job, tag, trigfile[timetype], grb, sbvoutpath, grbdir,\ vetodir=rundir) dag.add_node(node) # setup SBV clustered plotting job sbvoutpath = '%s/%s/%s/plots_clustered' % (outdir, plotdir, timetype) node = sbv_setup(job, tag, clstfile[timetype], grb, sbvoutpath, grbdir,\ vetodir=rundir) dag.add_node(node) if timeSlides: for timetype in timeSlideTags: # setup SBV clustered plotting job sbvoutpath = '%s/%s/%s_slides/plots_clustered'\ % (outdir, plotdir, timetype) node = sbv_setup(job, tag, clstslidefile[timetype], grb, sbvoutpath,\ grbdir, vetodir=rundir) dag.add_node(node) # Make the injection plot with whatever background is available if timeSlides: injclstfile = clstslidefile['OFFSOURCE'] else: injclstfile = clstfile['OFFSOURCE'] for injrun in filteredInjRuns: # setup SBV clusterd injection plots injfile = '%s/%s-INJECTION_GRB%s_%s_FOUND-%d-%d.xml'\ % (outdir, ifotag, grb, injrun, start, duration) sbvoutpath = '%s/%s/%s/plots_clustered' % (outdir, plotdir, injrun) node = sbv_setup(job, tag, injclstfile, grb, sbvoutpath, grbdir,\ vetodir=rundir,injfile=injfile) dag.add_node(node) # finalise DAG parents = [] if run_clustering: parents.append(DAGManNode['trig_cluster']) if run_injcombiner: parents.append(DAGManNode['injcombiner']) if timeSlides: parents.append(DAGManNode['trig_combiner_part2']) DAGManNode[tag] = finalise_DAG(dag, parents) uberdag.add_node(DAGManNode[tag]) # ========== # efficiency # ========== if run_efficiency: tag = 'efficiency' exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # construct arguments job.add_opt('segment-dir', grbdir) # Select the appropriate offsource file if timeSlides: offfile = clstslidefile['OFFSOURCE'] else: offfile = clstfile['OFFSOURCE'] for timetype in timetags: if timetype in ['OFFSOURCE', 'ALL_TIMES']: continue # setup onoff efficiency jobs effoutdir = '%s/%s/%s/efficiency' % (outdir, plotdir, timetype) node = onoff_efficiency_setup(job, tag, effoutdir, grbdir,\ offfile, clstfile[timetype],\ vetodir=rundir) dag.add_node(node) # define new job for inejction efficiency tag = 'injection-efficiency' job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) for timetype in timetags: if timetype in ['OFFSOURCE', 'ALL_TIMES']: continue for injrun in filteredInjRuns: # setup injection efficiency jobs found = '%s/%s-INJECTION_GRB%s_%s_FOUND-%d-%d.xml'\ % (outdir, ifotag, grb, injrun, start, duration) missed = found.replace('FOUND', 'MISSED') effoutdir = '%s/%s/%s/efficiency_%s' % \ (outdir, plotdir, injrun, timetype) node = injection_efficiency_setup(job, tag, effoutdir, grbdir,\ offfile,clstfile[timetype],\ filteredDistRuns[injrun], injcp,found, missed,\ vetodir=rundir) dag.add_node(node) # finalise DAG parents = [] if run_clustering: parents.append(DAGManNode['trig_cluster']) if run_injcombiner: parents.append(DAGManNode['injcombiner']) if timeSlides: parents.append(DAGManNode['trig_combiner_part2']) DAGManNode[tag] = finalise_DAG(dag, parents) uberdag.add_node(DAGManNode[tag]) # ================= # Horizon dist plot # ================= if run_horizon_dist_plot: tag = 'horizon_dist' exe = cp.get('condor', tag) # generate job dag = init_subDAG(tag, outdir, logdir, cp) job = init_job(exe, universe, tag, outdir, logdir, cp, minmemory) # setup single node node = horizon_distance_setup(job, tag, ifotag, grb, onoffcache,\ rundir, '%s/%s' %(outdir,plotdir)) dag.add_node(node) # finalise DAG parents = [] if run_combiner: parents.append(DAGManNode['trig_combiner']) DAGManNode[tag] = finalise_DAG(dag, parents=parents) uberdag.add_node(DAGManNode[tag]) # ============= # write uberdag # ============= uberdag.write_sub_files() uberdag.write_dag() # print message print >> sys.stdout print >> sys.stdout, '------------------------------------' print >> sys.stdout, 'Ready. To submit, run:' print >> sys.stdout subcmd = 'condor_submit_dag ' if cp.has_option('pipeline', 'maxjobs'): subcmd += '-maxjobs %s ' % cp.getint('pipeline', 'maxjobs') subcmd += os.path.abspath(uberdag.get_dag_file()) print >> sys.stdout, subcmd print >> sys.stdout print >> sys.stdout, 'Once submitted, to monitor status, run:' print >> sys.stdout print >>sys.stdout, 'lalapps_ihope_status --dag-file %s'\ % (os.path.abspath(uberdag.get_dag_file())) print >> sys.stdout, '------------------------------------' print >> sys.stdout
m1_min = m1_priors[roq][0] m1_max = m1_priors[roq][1] m2_min = m2_priors[roq][0] m2_max = m2_priors[roq][1] this_cp.set('engine', 'mass1-min', str(m1_min)) this_cp.set('engine', 'mass1-max', str(m1_max)) this_cp.set('engine', 'mass2-min', str(m2_min)) this_cp.set('engine', 'mass2-max', str(m2_max)) yield this_cp raise StopIteration() # Create an outer dag to wrap the sub-dags outerdaglog = os.path.join(daglogdir, 'lalinference_multi_' + str(uuid.uuid1()) + '.log') outerdag = pipeline.CondorDAG(outerdaglog, dax=False) outerdag.set_dag_file(os.path.join(cp.get('paths', 'basedir'), 'multidag')) master_cp = cp # Iterate over variations and generate sub-dags for cp in generate_variations(master_cp, variations): basepath = cp.get('paths', 'basedir') # Link injection file into place as paths outside basedir are inaccessible to containerised jobs if cp.has_option('input', 'injection-file'): injpath = cp.get('input', 'injection-file') myinjpath = os.path.join(basepath, os.path.basename(injpath)) if os.path.abspath(myinjpath) != os.path.abspath(injpath): # If the injection file does not exist in the run dir, link it into place # Useful for singularity jobs which see only rundir if os.path.lexists(myinjpath): # If the path exists, see if it is a link to the current file
if not os.path.exists(iniFile): print 'Can not find iniFile: ',os.path.basename(iniFile) os.abort() if not os.path.exists(singleList): print 'Can not find segentlist: ',os.path.basename(singleList) os.abort() #Load up the iniFile cp= ConfigParser.ConfigParser() cp.read(iniFile) #Setup DAG outputPath=os.path.abspath(os.path.normpath(outputPath)) dagLog=os.path.normpath(dagLocks+"/"+outputName+".LOG") myDag=pipeline.CondorDAG(os.path.normpath(dagLog)) myDag.set_dag_file(os.path.normpath(str(outputName))) #Setup SUB tsHandler=os.path.expanduser(cp.get('condor','clustertool')) tsUniverse=str(cp.get('condor','clustertool_universe')).lower() if not os.path.exists(str(tsHandler)): print "ERROR: Can't find tracksearch handler executable." os.abort() myJob=pipeline.CondorDAGJob(tsUniverse,tsHandler) myJob.set_sub_file(str(outputName)+".sub") logDir=os.path.normpath(outputPath+"/logs/") buildDir(logDir) myJob.set_stdout_file(os.path.abspath(os.path.normpath(logDir+"/log_$(cluster)_$(process).out")))
# # we already checked that the last 4 characters of the config file are '.ini' basename = opts.config_file[:-4] logname = basename + '.dag.log.' try: os.mkdir(opts.log_path) except: pass tempfile.tempdir = opts.log_path logfile = tempfile.mktemp(prefix=logname) fh = open( logfile, "w" ) fh.close() ############################################################################## # create the DAG writing the log to the specified directory dag = pipeline.CondorDAG(logfile,dax=opts.dax) dag.set_dag_file(basename) dag.set_dax_file(basename) ############################################################################## # Set up the IFOs and get the appropriate segments ifos = [] for option in ["g1-data","h1-data","h2-data","l1-data","v1-data"]: if cp.has_option("ifo-details",option): ifos.append(option[0:2].upper() ) if cp.has_option("ifo-details","analyze-all"): print >> sys.stderr, \ "The inspiral pipeline does not yet support coincidence between" print >> sys.stderr, "all five IFOs. Do not use the analyze-all option." sys.exit(1)
arg_list = cip_args exe = which("util_ConstructIntrinsicPosterior_GenericCoordinates.py") cmd.write('#!/usr/bin/env bash\n') cmd.write(exe + ' ' + arg_list) cmd.close() st = os.stat(cmdname) import stat os.chmod(cmdname, st.st_mode | stat.S_IEXEC) ### ### DAG generation ### log_dir = "%s/logs/" % opts.working_directory # directory to hold dag = pipeline.CondorDAG(log=os.getcwd()) mkdir(log_dir) # Make a directory to hold log files of jobs ### ### Configuration 0: Fit job ### if opts.workflow == 'single' or opts.workflow == 'fit': if opts.workflow == 'fit': cip_args += ' --fit-save-gp my_fit.pkl' single_job, single_job_name = write_CIP_sub( tag='CIP', log_dir=log_dir, arg_str=cip_args, request_memory=opts.request_memory) single_job.write_sub_file()
def main(args=None): parser = create_parser() args = parser.parse_args(args=args) # apply verbosity to logger args.verbose = max(5 - args.verbose, 0) logger.setLevel(args.verbose * 10) # validate command line arguments if args.ifo is None: parser.error("Cannot determine IFO prefix from sytem, " "please pass --ifo on the command line") if args.executable is None: parser.error("Cannot find omicron on path, please pass " "--executable on the command line") # validate processing options if all((args.skip_root_merge, args.skip_hdf5_merge, args.skip_ligolw_add, args.skip_gzip, not args.archive)): args.skip_postprocessing = True if args.archive: argsd = vars(args) for arg in [ 'skip-root-merge', 'skip-hdf5-merge', 'skip-ligolw-add', 'skip-gzip' ]: if argsd[arg.replace('-', '_')]: parser.error("Cannot use --%s with --archive" % arg) # check conflicts if args.gps is None and args.cache_file is not None: parser.error("Cannot use --cache-file in 'online' mode, " "please use --cache-file with --gps") # extract key variables ifo = args.ifo group = args.group online = args.gps is None # format file-tag as underscore-delimited upper-case string filetag = args.file_tag if filetag: filetag = re.sub(r'[:_\s-]', '_', filetag).rstrip('_').strip('_') if const.OMICRON_FILETAG.lower() in filetag.lower(): afiletag = filetag else: afiletag = '%s_%s' % (filetag, const.OMICRON_FILETAG.upper()) filetag = '_%s' % filetag else: filetag = '' afiletag = const.OMICRON_FILETAG.upper() logger.info("--- Welcome to the Omicron processor ---") # set up containers to keep track of files that we create here tempfiles = [] keepfiles = [] # check rescue against --dagman-option force if args.rescue and args.dagman_option.count('force') > 1: parser.error('--rescue is incompatible with --dagman-option force') elif args.rescue: args.dagman_option.pop(0) logger.info( "Running in RESCUE mode - the workflow will be " "re-generated in memory without any files being written", ) # set omicron version for future use omicronv = utils.get_omicron_version(args.executable) const.OMICRON_VERSION = str(omicronv) os.environ.setdefault('OMICRON_VERSION', str(omicronv)) logger.debug('Omicron version: %s' % omicronv) # -- parse configuration file and get parameters -------------------------- cp = configparser.ConfigParser() cp.read(args.config_file) # validate if not cp.has_section(group): raise configparser.NoSectionError(group) # get params channels = cp.get(group, 'channels').strip('\n').rstrip('\n').split('\n') try: # allow two-column 'channel samplerate' format channels, crates = zip(*[c.split(' ', 1) for c in channels]) except ValueError: crates = [] else: crates = set(crates) logger.debug("%d channels read" % len(channels)) for i in range(len(channels) - 1, -1, -1): # remove excluded channels c = channels[i] if c in args.exclude_channel: channels.pop(i) logger.debug(" removed %r" % c) logger.debug("%d channels to process" % len(channels)) cp.set(group, 'channels', '\n'.join(channels)) frametype = cp.get(group, 'frametype') logger.debug("frametype = %s" % frametype) chunkdur = cp.getint(group, 'chunk-duration') logger.debug("chunkdur = %s" % chunkdur) segdur = cp.getint(group, 'segment-duration') logger.debug("segdur = %s" % segdur) overlap = cp.getint(group, 'overlap-duration') logger.debug("overlap = %s" % overlap) padding = int(overlap / 2) logger.debug("padding = %s" % padding) try: frange = tuple(map(float, cp.get(group, 'frequency-range').split())) except configparser.NoOptionError as e: try: flow = cp.getfloat(group, 'flow') fhigh = cp.getfloat(group, 'flow') except configparser.NoOptionError: raise e frange = (flow, fhigh) logger.debug('frequencyrange = [%s, %s)' % tuple(frange)) try: sampling = cp.getfloat(group, 'sample-frequency') except configparser.NoOptionError: if len(crates) == 1: sampling = float(crates[0]) elif len(crates) > 1: raise ValueError( "No sample-frequency parameter given, and multiple " "sample frequencies parsed from channels list, " "cannot continue", ) else: sampling = None if sampling: logger.debug('samplingfrequency = %s' % sampling) # get state channel try: statechannel = cp.get(group, 'state-channel') except configparser.NoOptionError: statechannel = None else: try: statebits = list( map( float, cp.get(group, 'state-bits').split(','), )) except configparser.NoOptionError: statebits = [0] try: stateft = cp.get(group, 'state-frametype') except configparser.NoOptionError as e: e.args = ('%s, this must be specified if state-channel is given' % str(e), ) raise # get state flag (if given) try: stateflag = cp.get(group, 'state-flag') except configparser.NoOptionError: stateflag = None else: logger.debug("State flag = %s" % stateflag) if not statechannel: # map state flag to state channel try: statechannel, statebits, stateft = ( segments.STATE_CHANNEL[stateflag]) except KeyError as e: if online or args.no_segdb: # only raise if channel required e.args = ('Cannot map state flag %r to channel' % stateflag, ) raise else: pass if statechannel: logger.debug("State channel = %s" % statechannel) logger.debug("State bits = %s" % ', '.join(map(str, statebits))) logger.debug("State frametype = %s" % stateft) # parse padding for state segments if statechannel or stateflag: try: statepad = cp.get(group, 'state-padding') except configparser.NoOptionError: statepad = (0, 0) else: try: p = int(statepad) except ValueError: statepad = tuple(map(float, statepad.split(',', 1))) else: statepad = (p, p) logger.debug("State padding: %s" % str(statepad)) rundir = utils.get_output_path(args) # convert to omicron parameters format oconfig = parameters.OmicronParameters.from_channel_list_config( cp, group, version=omicronv) # and validate things oconfig.validate() # -- set directories ------------------------------------------------------ rundir.mkdir(exist_ok=True, parents=True) logger.info("Using run directory\n%s" % rundir) cachedir = rundir / "cache" condir = rundir / "condor" logdir = rundir / "logs" pardir = rundir / "parameters" trigdir = rundir / "triggers" for d in [cachedir, condir, logdir, pardir, trigdir]: d.mkdir(exist_ok=True) oconfig.set('OUTPUT', 'DIRECTORY', str(trigdir)) # -- check for an existing process ---------------------------------------- dagpath = condir / "{}.dag".format(DAG_TAG) # check dagman lock file running = condor.dag_is_running(dagpath) if running: msg = "Detected {} already running in {}".format( dagpath, rundir, ) if not args.reattach: raise RuntimeError(msg) logger.info("{}, will reattach".format(msg)) else: args.reattach = False # check dagman rescue files nrescue = len( list(condir.glob("{}.rescue[0-9][0-9][0-9]".format(dagpath.name), ))) if args.rescue and not nrescue: raise RuntimeError( "--rescue given but no rescue DAG files found for {}".format( dagpath, ), ) if nrescue and not args.rescue and "force" not in args.dagman_option: raise RuntimeError( "rescue DAGs found for {} but `--rescue` not given and " "`--dagman-option force` not given, cannot continue".format( dagpath, ), ) newdag = not args.rescue and not args.reattach # -- find run segment ----------------------------------------------------- segfile = str(rundir / "segments.txt") keepfiles.append(segfile) if newdag and online: # get limit of available data (allowing for padding) end = data.get_latest_data_gps(ifo, frametype) - padding try: # start from where we got to last time start = segments.get_last_run_segment(segfile)[1] except IOError: # otherwise start with a sensible amount of data if args.use_dev_shm: # process one chunk logger.debug("No online segment record, starting with " "%s seconds" % chunkdur) start = end - chunkdur + padding else: # process the last 4000 seconds (arbitrarily) logger.debug("No online segment record, starting with " "4000 seconds") start = end - 4000 else: logger.debug("Online segment record recovered") elif online: start, end = segments.get_last_run_segment(segfile) else: start, end = args.gps duration = end - start datastart = start - padding dataend = end + padding dataduration = dataend - datastart logger.info("Processing segment determined as") logger.info(" %d %d" % (datastart, dataend)) logger.info("Duration = %d seconds" % dataduration) span = (start, end) # -- find segments and frame files ---------------------------------------- # minimum allowed duration is one full chunk minduration = 1 * chunkdur # validate span is long enough if dataduration < minduration and online: logger.info("Segment is too short (%d < %d), please try again later" % (duration, minduration)) clean_exit(0, tempfiles) elif dataduration < minduration: raise ValueError( "Segment [%d, %d) is too short (%d < %d), please " "extend the segment, or shorten the timing parameters." % (start, end, duration, chunkdur - padding * 2), ) # -- find run segments # get segments from state vector if (online and statechannel) or (statechannel and not stateflag) or (statechannel and args.no_segdb): logger.info("Finding segments for relevant state...") if statebits == "guardian": # use guardian segs = segments.get_guardian_segments( statechannel, stateft, datastart, dataend, pad=statepad, ) else: segs = segments.get_state_segments( statechannel, stateft, datastart, dataend, bits=statebits, pad=statepad, ) # get segments from segment database elif stateflag: logger.info("Querying segments for relevant state...") segs = segments.query_state_segments(stateflag, datastart, dataend, pad=statepad) # get segments from frame availability else: segs = segments.get_frame_segments(ifo, frametype, datastart, dataend) # print frame segments recovered if len(segs): logger.info("State/frame segments recovered as") for seg in segs: logger.info(" %d %d [%d]" % (seg[0], seg[1], abs(seg))) logger.info("Duration = %d seconds" % abs(segs)) # if running online, we want to avoid processing up to the extent of # available data, so that the next run doesn't get left with a segment that # is too short to process # There are a few reasons this might be # - the interferometer loses lock a short time after the end of this run # - a restart/other problem means that a frame is missing a short time # after the end of this run # so, work out whether we need to truncate: try: lastseg = segs[-1] except IndexError: truncate = False else: truncate = online and newdag and lastseg[1] == dataend # if final segment is shorter than two chunks, remove it entirely # so that it gets processed next time (when it will either a closed # segment, or long enough to process safely) if truncate and abs(lastseg) < chunkdur * 2: logger.info( "The final segment is too short, but ends at the limit of " "available data, presumably this is an active segment. It " "will be removed so that it can be processed properly later", ) segs = type(segs)(segs[:-1]) dataend = lastseg[0] # otherwise, we remove the final chunk (so that the next run has at # least that on which to operate), then truncate to an integer number # of chunks (so that # PSD estimation operates on a consistent amount # of data) elif truncate: logger.info("The final segment touches the limit of available data, " "the end chunk will be removed to guarantee that the next " "online run has enough data over which to operate") t, e = lastseg e -= chunkdur + padding # remove one chunk # now truncate to an integer number of chunks step = chunkdur while t + chunkdur <= e: t += step step = chunkdur - overlap segs[-1] = type(segs[-1])(lastseg[0], t) dataend = segs[-1][1] logger.info("This analysis will now run to %d" % dataend) # recalculate the processing segment dataspan = type(segs)([segments.Segment(datastart, dataend)]) # -- find the frames # find frames under /dev/shm (which creates a cache of temporary files) if args.cache_file: cache = read_cache(str(args.cache_file)) # only cache if we have state segments elif args.use_dev_shm and len(segs): cache = data.find_frames(ifo, frametype, datastart, dataend, on_gaps='warn', tmpdir=cachedir) # remove cached files at end of process tempfiles.extend(filter(lambda p: str(cachedir) in p, cache)) # find frames using datafind else: cache = data.find_frames(ifo, frametype, datastart, dataend, on_gaps='warn') # if not frames for an online run, panic if not online and len(cache) == 0: raise RuntimeError("No frames found for %s-%s" % (ifo[0], frametype)) # work out the segments of data available try: cachesegs = (segments.cache_segments(cache) & dataspan).coalesce() except TypeError: # empty cache cachesegs = type(dataspan)() alldata = False else: try: alldata = cachesegs[-1][1] >= dataspan[-1][1] except IndexError: # no data overlapping span alldata = False # write cache of frames (only if creating a new DAG) cachefile = cachedir / "frames.lcf" keepfiles.append(cachefile) if newdag: data.write_cache(cache, cachefile) oconfig.set('DATA', 'FFL', str(cachefile)) logger.info("Cache of %d frames written to\n%s" % (len(cache), cachefile)) # restrict analysis to available data (and warn about missing data) if segs - cachesegs: logger.warning("Not all state times are available in frames") segs = (cachesegs & segs).coalesce() # apply minimum duration requirement segs = type(segs)(s for s in segs if abs(s) >= segdur) # if all of the data are available, but no analysable segments were found # (i.e. IFO not in right state for all times), record segments.txt if newdag and len(segs) == 0 and online and alldata: logger.info( "No analysable segments found, but up-to-date data are " "available. A segments.txt file will be written so we don't " "have to search these data again", ) segments.write_segments(cachesegs, segfile) logger.info("Segments written to\n%s" % segfile) clean_exit(0, tempfiles) # otherwise not all data are available, so elif len(segs) == 0 and online: logger.info("No analysable segments found, please try again later") clean_exit(0, tempfiles) elif len(segs) == 0: raise RuntimeError("No analysable segments found") # and calculate trigger output segments trigsegs = type(segs)(type(s)(*s) for s in segs).contract(padding) # display segments logger.info("Final data segments selected as") for seg in segs: logger.info(" %d %d " % seg + "[%d]" % abs(seg)) logger.info("Duration = %d seconds" % abs(segs)) span = type(trigsegs)([trigsegs.extent()]) logger.info("This will output triggers for") for seg in trigsegs: logger.info(" %d %d " % seg + "[%d]" % abs(seg)) logger.info("Duration = %d seconds" % abs(trigsegs)) # -- config omicron config directory -------------------------------------- tempfiles.append(utils.astropy_config_path(rundir)) # -- make parameters files then generate the DAG -------------------------- fileformats = oconfig.output_formats() # generate a 'master' parameters.txt file for archival purposes if not newdag: # if not writing new dag, dump parameters.txt files to /tmp pardir = gettempdir() parfile, jobfiles = oconfig.write_distributed( pardir, nchannels=args.max_channels_per_job) logger.debug("Created master parameters file\n%s" % parfile) if newdag: keepfiles.append(parfile) # create dag dag = pipeline.CondorDAG(str(logdir / "{}.log".format(DAG_TAG))) dag.set_dag_file(str(dagpath.with_suffix(""))) # set up condor commands for all jobs condorcmds = { 'accounting_group': args.condor_accounting_group, 'accounting_group_user': args.condor_accounting_group_user } for cmd_ in args.condor_command: key, value = cmd_.split('=', 1) condorcmds[key.rstrip().lower()] = value.strip() # create omicron job reqmem = condorcmds.pop('request_memory', 1000) ojob = condor.OmicronProcessJob(args.universe, args.executable, subdir=condir, logdir=logdir, **condorcmds) ojob.add_condor_cmd('request_memory', reqmem) ojob.add_condor_cmd('+OmicronProcess', '"%s"' % group) # create post-processing job ppjob = condor.OmicronProcessJob(args.universe, find_executable('bash'), subdir=condir, logdir=logdir, tag='post-processing', **condorcmds) ppjob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group) ppjob.add_short_opt('e', '') ppnodes = [] rootmerge = find_executable('omicron-root-merge') hdf5merge = find_executable('omicron-hdf5-merge') ligolw_add = find_executable('ligolw_add') gzip = find_executable('gzip') # create node to remove files rmjob = condor.OmicronProcessJob(args.universe, str(condir / "post-process-rm.sh"), subdir=condir, logdir=logdir, tag='post-processing-rm', **condorcmds) rm = find_executable('rm') rmfiles = [] rmjob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group) if args.archive: archivejob = condor.OmicronProcessJob(args.universe, str(condir / "archive.sh"), subdir=condir, logdir=logdir, tag='archive', **condorcmds) archivejob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group) archivefiles = {} # loop over data segments for s, e in segs: # build trigger segment ts = s + padding te = e - padding td = te - ts # distribute segment across multiple nodes nodesegs = oconfig.distribute_segment(s, e, nperjob=args.max_chunks_per_job) omicronfiles = {} # build node for each parameter file for i, pf in enumerate(jobfiles): chanlist = jobfiles[pf] nodes = [] # loop over distributed segments for subseg in nodesegs: if not args.skip_omicron: # work out files for this job nodefiles = oconfig.output_files(*subseg) # build node node = pipeline.CondorDAGNode(ojob) node.set_category('omicron') node.set_retry(args.condor_retry) node.add_var_arg(str(subseg[0])) node.add_var_arg(str(subseg[1])) node.add_file_arg(pf) for chan in chanlist: for form, flist in nodefiles[chan].items(): # record file as output from this node for f in flist: node._CondorDAGNode__output_files.append(f) # record file as output for this channel try: omicronfiles[chan][form].extend(flist) except KeyError: try: omicronfiles[chan][form] = flist except KeyError: omicronfiles[chan] = {form: flist} dag.add_node(node) nodes.append(node) # post-process (one post-processing job per channel # per data segment) if not args.skip_postprocessing: script = condir / "post-process-{}-{}-{}.sh".format(i, s, e) ppnode = pipeline.CondorDAGNode(ppjob) ppnode.add_var_arg(str(script)) operations = [] # build post-processing nodes for each channel for c in chanlist: operations.append('\n# %s' % c) chandir = trigdir / c # work out filenames for coalesced files archpath = Path( io.get_archive_filename( c, ts, td, filetag=afiletag, ext='root', )) mergepath = chandir / archpath.name target = str(archpath.parent) # add ROOT operations if 'root' in fileformats: rootfiles = ' '.join(omicronfiles[c]['root']) for f in omicronfiles[c]['root']: ppnode._CondorDAGNode__input_files.append(f) if args.skip_root_merge or (len( omicronfiles[c]['root']) == 1): root = rootfiles else: root = str(mergepath) operations.append('%s %s %s --strict' % (rootmerge, rootfiles, root)) rmfiles.append(rootfiles) ppnode._CondorDAGNode__output_files.append(root) if args.archive: try: archivefiles[target].append(root) except KeyError: archivefiles[target] = [root] rmfiles.append(root) # add HDF5 operations if 'hdf5' in fileformats: hdf5files = ' '.join(omicronfiles[c]['hdf5']) for f in omicronfiles[c]['hdf5']: ppnode._CondorDAGNode__input_files.append(f) if args.skip_hdf5_merge or (len( omicronfiles[c]['hdf5']) == 1): hdf5 = hdf5files else: hdf5 = str(mergepath.with_suffix(".h5")) operations.append( '{cmd} {infiles} {outfile}'.format( cmd=hdf5merge, infiles=hdf5files, outfile=hdf5, ), ) rmfiles.append(hdf5files) ppnode._CondorDAGNode__output_files.append(hdf5) if args.archive: try: archivefiles[target].append(hdf5) except KeyError: archivefiles[target] = [hdf5] rmfiles.append(hdf5) # add LIGO_LW operations if 'xml' in fileformats: xmlfiles = ' '.join(omicronfiles[c]['xml']) for f in omicronfiles[c]['xml']: ppnode._CondorDAGNode__input_files.append(f) if (args.skip_ligolw_add or len(omicronfiles[c]['xml']) == 1): xml = xmlfiles else: xml = str(mergepath.with_suffix(".xml")) operations.append( '%s %s --ilwdchar-compat --output %s' % (ligolw_add, xmlfiles, xml), ) rmfiles.append(xmlfiles) ppnode._CondorDAGNode__output_files.append(xml) if not args.skip_gzip: operations.append( '%s --force --stdout %s > %s.gz' % (gzip, xml, xml)) rmfiles.append(xml) xml = str(mergepath.with_suffix(".xml.gz")) ppnode._CondorDAGNode__output_files.append(xml) if args.archive: try: archivefiles[target].append(xml) except KeyError: archivefiles[target] = [xml] rmfiles.append(xml) # add ASCII operations if 'txt' in fileformats: txtfiles = ' '.join(omicronfiles[c]['txt']) for f in omicronfiles[c]['txt']: ppnode._CondorDAGNode__input_files.append(f) if args.archive: try: archivefiles[target].append(txtfiles) except KeyError: archivefiles[target] = [txtfiles] rmfiles.append(txtfiles) ppnode.set_category('postprocessing') ppnode.set_retry(str(args.condor_retry)) if not args.skip_omicron: for node in nodes: ppnode.add_parent(node) dag.add_node(ppnode) ppnodes.append(ppnode) tempfiles.append(script) # write post-processing file if not args.rescue: with script.open("w") as f: # add header print('#!/bin/bash -e\n#', file=f) print("# omicron-process post-processing", file=f) print( '#\n# File created by\n# {}\n#'.format( ' '.join(sys.argv), ), file=f, ) print("# Group: %s" % group, file=f) print("# Segment: [%d, %d)" % (s, e), file=f) print("# Channels:\n#", file=f) for c in chanlist: print('# %s' % c, file=f) # add post-processing operations print('\n'.join(operations), file=f) if newdag: script.chmod(0o755) # set 'strict' option for Omicron # this is done after the nodes are written so that 'strict' is last in # the call ojob.add_arg('strict') # do all archiving last, once all post-processing has completed if args.archive: archivenode = pipeline.CondorDAGNode(archivejob) acache = {fmt: list() for fmt in fileformats} if newdag: # write shell script to seed archive with open(archivejob.get_executable(), 'w') as f: print('#!/bin/bash -e\n', file=f) for gpsdir, filelist in archivefiles.items(): for fn in filelist: archivenode._CondorDAGNode__input_files.append(fn) # write 'mv' op to script print("mkdir -p %s" % gpsdir, file=f) print("cp %s %s" % (' '.join(filelist), gpsdir), file=f) # record archived files in caches filenames = [ str(Path(gpsdir) / x.name) for x in map(Path, filelist) ] for fn in filenames: archivenode._CondorDAGNode__output_files.append(fn) for fmt, extensions in { 'xml': ('.xml.gz', '.xml'), 'root': '.root', 'hdf5': '.h5', 'txt': '.txt', }.items(): try: acache[fmt].extend( filter(lambda x: x.endswith(extensions), filenames)) except KeyError: # file format not used continue os.chmod(archivejob.get_executable(), 0o755) # write caches to disk for fmt, fcache in acache.items(): cachefile = cachedir / "omicron-{0}.lcf".format(fmt) data.write_cache(fcache, cachefile) logger.debug("{0} cache written to {1}".format(fmt, cachefile)) # add node to DAG for node in ppnodes: archivenode.add_parent(node) archivenode.set_retry(args.condor_retry) archivenode.set_category('archive') dag.add_node(archivenode) tempfiles.append(archivejob.get_executable()) # add rm job right at the end rmnode = pipeline.CondorDAGNode(rmjob) rmscript = rmjob.get_executable() with open(rmscript, 'w') as f: print('#!/bin/bash -e\n#', file=f) print("# omicron-process post-processing-rm", file=f) print('#\n# File created by\n# %s\n#' % ' '.join(sys.argv), file=f) print("# Group: %s" % group, file=f) print("# Segment: [%d, %d)" % (s, e), file=f) print("# Channels:\n#", file=f) for c in channels: print('# %s' % c, file=f) print('', file=f) for rmset in rmfiles: print('%s -f %s' % (rm, rmset), file=f) if newdag: os.chmod(rmscript, 0o755) tempfiles.append(rmscript) rmnode.set_category('postprocessing') if args.archive: # run this after archiving rmnode.add_parent(archivenode) else: # or just after post-processing if not archiving for node in ppnodes: rmnode.add_parent(node) dag.add_node(rmnode) # print DAG to file dagfile = Path(dag.get_dag_file()).resolve(strict=False) if args.rescue: logger.info( "In --rescue mode, this DAG has been reproduced in memory " "for safety, but will not be written to disk, the file is:", ) elif newdag: dag.write_sub_files() dag.write_dag() dag.write_script() with open(dagfile, 'a') as f: print("DOT", dagfile.with_suffix(".dot"), file=f) logger.info("Dag with %d nodes written to" % len(dag.get_nodes())) print(dagfile) if args.no_submit: if newdag: segments.write_segments(span, segfile) logger.info("Segments written to\n%s" % segfile) sys.exit(0) # -- submit the DAG and babysit ------------------------------------------- # submit DAG if args.rescue: logger.info("--- Submitting rescue DAG to condor ----") elif args.reattach: logger.info("--- Reattaching to existing DAG --------") else: logger.info("--- Submitting DAG to condor -----------") for i in range(args.submit_rescue_dag + 1): if args.reattach: # find ID of existing DAG dagid = int( condor.find_job(Owner=getuser(), OmicronDAGMan=group)['ClusterId']) logger.info("Found existing condor ID = %d" % dagid) else: # or submit DAG dagmanargs = set() if online: dagmanopts = {'-append': '+OmicronDAGMan=\"%s\"' % group} else: dagmanopts = {} for x in args.dagman_option: x = '-%s' % x try: key, val = x.split('=', 1) except ValueError: dagmanargs.add(x) else: dagmanopts[key] = val dagid = condor.submit_dag( str(dagfile), *list(dagmanargs), **dagmanopts, ) logger.info("Condor ID = %d" % dagid) # write segments now -- this means that online processing will # _always_ move on even if the workflow fails if i == 0: segments.write_segments(span, segfile) logger.info("Segments written to\n%s" % segfile) if 'force' in args.dagman_option: args.dagman_option.pop(args.dagman_option.index('force')) # monitor the dag logger.debug("----------------------------------------") logger.info("Monitoring DAG:") check_call([ "pycondor", "monitor", "--time", "5", "--length", "36", str(dagfile), ]) print() logger.debug("----------------------------------------") sleep(5) try: stat = condor.get_dag_status(dagid) except OSError as exc: # query failed logger.warning(str(exc)) stat = {} # log exitcode if "exitcode" not in stat: logger.warning("DAG has exited, status unknown") break if not stat["exitcode"]: logger.info("DAG has exited with status {}".format( stat.get("exitcode", "unknown"), )) break logger.critical( "DAG has exited with status {}".format(stat['exitcode']), ) # handle failure if i == args.submit_rescue_dag: raise RuntimeError("DAG has failed to complete %d times" % (args.submit_rescue_dag + 1)) else: rescue = condor.find_rescue_dag(str(dagfile)) logger.warning("Rescue DAG %s was generated" % rescue) # mark output and error files of condor nodes that passed to be deleted try: for node, files in condor.get_out_err_files(dagid, exitcode=0).items(): tempfiles.extend(files) except RuntimeError: pass # archive files stub = '%d-%d' % (start, end) for f in map(Path, ["{}.dagman.out".format(dagfile)] + keepfiles): archive = logdir / "{0[0]}.{1}.{0[1]}".format( f.name.split(".", 1), stub, ) if str(f) == str(segfile): shutil.copyfile(f, archive) else: f.rename(archive) logger.debug("Archived path\n{} --> {}".format(f, archive)) # clean up temporary files tempfiles.extend(trigdir.glob("ffconvert.*.ffl")) clean_tempfiles(tempfiles) # and exit logger.info("--- Processing complete ----------------")
if opts.do_coh_PTF: for program in ['coh_PTF_inspiral','coh_PTF_spin_checker']: if not opts.ipn: cp.set(program,'right-ascension',str(ext_trigs[0].event_ra)) cp.set(program,'declination',str(ext_trigs[0].event_dec)) cp.set(program,'trigger-time',str(ext_trigs[0].start_time)) cp.set(program,'trigger-time-ns',str(ext_trigs[0].start_time_ns)) ############################################################################ # set up the über dag for all intervals and all injections tag = opts.config_file.rstrip(".ini") if opts.user_tag: tag += '_'+opts.user_tag uberdag = pipeline.CondorDAG("%s/%s_uberdag.log" % (opts.log_path, tag)) uberdag.set_dag_file("%s_uberdag" % tag) ############################################################################## # loop over the GRBs and construct their required sub-DAGs grb_caches = [] for grb in ext_trigs: # name and the directory idirectory = "GRB" + str(grb.event_number_grb) if opts.verbose: print("* Constructing workflow for", idirectory) mkdir(idirectory, opts.overwrite_dir)
prior_cp.set('engine', 'zeroLogLike', '') prior_cp.set('engine', 'nlive', str(20 * opts.trials)) elif prior_cp.get('analysis', 'engine') == 'lalinferencemcmc': prior_cp.set('engine', 'Neff', str(opts.trials)) prior_cp.set('engine', 'zeroLogLike', '') elif prior_cp.get('analysis', 'engine') == 'lalinferencebambi': prior_cp.set('engine', 'zeroLogLike', '') prior_cp.set('engine', 'nlive', str(opts.trials)) elif prior_cp.get('analysis', 'engine') == 'lalinferencebambimpi': prior_cp.set('engine', 'zeroLogLike', '') prior_cp.set('engine', 'nlive', str(opts.trials)) # Create a DAG to contain the other scripts outerdaglog = os.path.join( daglogdir, 'lalinference_injection_test_' + str(uuid.uuid1()) + '.log') outerdag = pipeline.CondorDAG(outerdaglog) outerdag.set_dag_file(os.path.join(rundir, 'priortest')) # Run code with prior sampling trig_time = 1085855789 fake_event = pipe_utils.Event(trig_time=trig_time) tfpath = os.path.join(rundir, 'time.txt') tfile = open(tfpath, 'w') print('%i\n' % (trig_time), file=tfile) tfile.close() prior_cp.set('input', 'gps-time-file', tfpath) priordag = pipe_utils.LALInferencePipelineDAG(prior_cp) priordag.set_dag_file(os.path.join(priordir, 'lalinference_priorsample')) priordagjob = pipeline.CondorDAGManJob(priordag.get_dag_file(), dir=priordir) priordagnode = pipeline.CondorDAGManNode(priordagjob)
script_fh.write(script_text) script_fh.close() if mode == "single": print "Run the following command:" print " ".join( [executable_name, interferometer, str(start_time), str(end_time)]) sys.exit() ## Generate sub file and dag file run_dir = run_dir + "/" # Initialize dag dag = pipeline.CondorDAG('s6publish.log', dax=False) dag.set_dag_file(run_dir + 's6publish') subFile = pipeline.CondorDAGJob(mode, executable_name) subFile.set_stdout_file(run_dir + 's6publish-$(cluster)-$(process).out') subFile.set_stderr_file(run_dir + 's6publish-$(cluster)-$(process).err') subFile.set_sub_file(run_dir + 's6publish.sub') #print "Subfile:" #print subFile.get_sub_file() #blah2=open('v1_run_commands.txt','r') #cmds=blah2.readlines() #cmds #cmds=[i.strip() for i in cmds] times = [] #print "Computing times:"
def main(args=None): """Run the command-line Omega scan tool in batch mode """ parser = create_parser() args = parser.parse_args(args=args) # initialize logger logger = cli.logger( name=PROG.split('python -m ').pop(), level='DEBUG' if args.verbose else 'INFO', ) # check time options N = sum([ args.day is not None, args.month is not None, args.gps_start_time is not None, args.gps_end_time is not None ]) if N > 1 and not (args.gps_start_time and args.gps_end_time): raise parser.error("Please give only one of --day, --month, or " "--gps-start-time and --gps-end-time.") for (i, cf) in enumerate(args.config_file): args.config_file[i] = ','.join(map(os.path.abspath, cf.split(','))) args.global_config = list( map( os.path.abspath, [fp for csv in args.global_config for fp in csv.split(',')], )) # -- build workflow directories ----------------- # move to output directory indir = os.getcwd() mkdir(args.output_dir) os.chdir(args.output_dir) outdir = os.curdir # set node log path, and condor log path logdir = os.path.join(outdir, 'logs') htclogdir = args.log_dir or logdir mkdir(logdir, htclogdir) # set config directory and copy config files etcdir = os.path.join(outdir, 'etc') mkdir(etcdir) for (i, fp) in enumerate(args.global_config): inicopy = os.path.join(etcdir, os.path.basename(fp)) if not os.path.isfile(inicopy) or not os.path.samefile(fp, inicopy): shutil.copyfile(fp, inicopy) args.global_config[i] = os.path.abspath(inicopy) for (i, csv) in enumerate(args.config_file): inicopy = [] for fp in csv.split(','): fp2 = os.path.join(etcdir, os.path.basename(fp)) if not os.path.isfile(fp2) or not os.path.samefile(fp, fp2): shutil.copyfile(fp, fp2) inicopy.append(os.path.abspath(fp2)) args.config_file[i] = ','.join(inicopy) logger.debug("Copied all INI configuration files to %s" % etcdir) # -- configure X509 and kerberos for condor ----- if args.universe != 'local': # copy X509 grid certificate into local location (x509cert, _) = find_credential() x509copy = os.path.join(etcdir, os.path.basename(x509cert)) shutil.copyfile(x509cert, x509copy) # rerun kerberos with new path krb5cc = os.path.abspath(os.path.join(etcdir, 'krb5cc.krb5')) gwkerberos.kinit(krb5ccname=krb5cc) logger.debug("Configured Condor and Kerberos " "for NFS-shared credentials") # -- build DAG ---------------------------------- dag = pipeline.CondorDAG(os.path.join(htclogdir, '%s.log' % args.file_tag)) dag.set_dag_file(os.path.join(outdir, args.file_tag)) universe = args.universe # -- parse condor commands ---------------------- # parse into a dict condorcmds = {} if args.condor_timeout: condorcmds['periodic_remove'] = ( 'CurrentTime-EnteredCurrentStatus > %d' % (3600 * args.condor_timeout)) for cmd_ in args.condor_command: (key, value) = cmd_.split('=', 1) condorcmds[key.rstrip().lower()] = value.strip() if args.universe != 'local': # add X509 to environment for (env_, val_) in zip(['X509_USER_PROXY', 'KRB5CCNAME'], [os.path.abspath(x509copy), krb5cc]): condorenv = '%s=%s' % (env_, val_) if ('environment' in condorcmds and env_ not in condorcmds['environment']): condorcmds['environment'] += ';%s' % condorenv elif 'environment' not in condorcmds: condorcmds['environment'] = condorenv # -- build individual gw_summary jobs ----------- globalconfig = ','.join(args.global_config) jobs = [] if not args.skip_html_wrapper: htmljob = GWSummaryJob('local', subdir=outdir, logdir=logdir, tag='%s_local' % args.file_tag, **condorcmds) jobs.append(htmljob) if not args.html_wrapper_only: datajob = GWSummaryJob(universe, subdir=outdir, logdir=logdir, tag=args.file_tag, **condorcmds) jobs.append(datajob) # add common command-line options for job in jobs: if args.day: job.set_command('day') job.add_arg(args.day) elif args.week: job.set_command('week') job.add_arg(args.week) elif args.month: job.set_command('month') job.add_arg(args.month) elif args.year: job.set_command('year') job.add_arg(args.year) elif args.gps_start_time or args.gps_end_time: job.set_command('gps') job.add_arg(str(args.gps_start_time)) job.add_arg(str(args.gps_end_time)) else: job.set_command('day') if args.nds is True: job.add_opt('nds') if args.single_process: job.add_opt('single-process') elif args.multi_process is not None: job.add_opt('multi-process', args.multi_process) if args.verbose: job.add_opt('verbose') if args.ifo: job.add_opt('ifo', args.ifo) job.add_opt('on-segdb-error', args.on_segdb_error) job.add_opt('on-datafind-error', args.on_datafind_error) job.add_opt('output-dir', outdir) for (opt, fplist) in zip( ['--data-cache', '--event-cache', '--segment-cache'], [args.data_cache, args.event_cache, args.segment_cache]): if fplist: job.add_arg('%s %s' % (opt, (' %s ' % opt).join(fplist))) if args.no_htaccess: job.add_opt('no-htaccess') # make surrounding HTML first if not args.skip_html_wrapper: htmljob.add_opt('html-only', '') htmljob.add_opt('config-file', ','.join([globalconfig] + args.config_file).strip(',')) htmlnode = GWSummaryDAGNode(htmljob) for configfile in args.config_file: htmlnode.add_input_file(args.config_file) htmlnode.set_category('gw_summary') dag.add_node(htmlnode) logger.debug(" -- Configured HTML htmlnode job") # create node for each config file if not args.html_wrapper_only: # add html opts datajob.add_opt('no-html', '') if args.archive: datajob.add_condor_cmd('+SummaryNodeType', '"$(macroarchive)"') # configure each data node for (i, configfile) in enumerate(args.config_file): node = GWSummaryDAGNode(datajob) node.add_var_arg('--config-file %s' % ','.join([globalconfig, configfile]).strip(',')) if args.archive: jobtag = os.path.splitext(os.path.basename(configfile))[0] archivetag = jobtag.upper().replace('-', '_') if args.ifo and archivetag.startswith( '%s_' % args.ifo.upper()): archivetag = archivetag[3:] node.add_var_opt('archive', archivetag) for cf in configfile.split(','): node.add_input_file(cf) node.set_category('gw_summary') try: node.set_priority(args.priority[i]) except IndexError: node.set_priority(0) node.set_retry(1) if not args.skip_html_wrapper: node.add_parent(htmlnode) dag.add_node(node) logger.debug(" -- Configured job for config %s" % configfile) if args.maxjobs: dag.add_maxjobs_category('gw_summary', args.maxjobs) # -- finish up ---------------------------------- dag.write_sub_files() dag.write_dag() dag.write_script() logger.debug("Setup complete, DAG written to: {}".format( os.path.abspath(dag.get_dag_file()))) # return to original directory os.chdir(indir)
print("Use --help for usage details.", file=sys.stderr) sys.exit(1) # create the config parser object and read in the ini file cp = ConfigParser.ConfigParser() cp.read(opts.config_file) # create a log file that the Condor jobs will write to tempfile.tempdir = opts.log_path tempfile.template = opts.basename + '.log' logfile = tempfile.mktemp() fh = open( logfile, "w" ) fh.close() # create the DAG writing the log to the specified directory dag = pipeline.CondorDAG(logfile,opts.write_dax) dag.set_dag_file(opts.basename) subsuffix = '.sub' # create the Condor jobs that will be used in the DAG mkdir_job = strain.MkdirJob('logs',cp) mkdir_node = strain.MkdirNode(mkdir_job,'cache') if opts.write_dax: dag.add_node(mkdir_node) # try and make a directory to store the cache files and job logs try: os.mkdir('logs') except: pass # #try: os.mkdir('cache') #except: pass
node.set_output(destination) dag.add_node(node) return [node] # # ============================================================================= # # DAG Construction # # ============================================================================= # power.make_dag_directories(config_parser) dag = pipeline.CondorDAG(condor_log) dag.set_dag_file(options.dag_name) datafinds = power.make_datafind_stage(dag, options.data_seglists, verbose=True) nodes = power.make_single_instrument_stage(dag, datafinds, options.data_seglists, options.user_tag, timing_params, psds_per_job, verbose=True) nodes = power.make_lladded_bucluster_fragment(dag, nodes, options.data_seg, options.user_tag) make_publish_fragment(dag, nodes, options.data_seg, options.user_tag,
trigger_bin = roq print 'Prior in Mchirp will be ['+str(mc_priors[roq][0]*roq_mass_freq_scale_factor)+','+str(mc_priors[roq][1]*roq_mass_freq_scale_factor)+'] to contain the trigger Mchirp '+str(trigger_mchirp) break roq_paths = [trigger_bin] else: for mc_prior in mc_priors: mc_priors[mc_prior] = array(mc_priors[mc_prior])*roq_mass_freq_scale_factor else: roq_paths=[None] fp.close() outerdaglog=os.path.join(daglogdir,'lalinference_multi_'+str(uuid.uuid1())+'.log') outerdag=pipeline.CondorDAG(outerdaglog,dax=opts.dax) outerdag.set_dag_file(os.path.join(rundir_root,'multidag')) for sampler in samps: for app in apps: for roq in roq_paths: if not os.path.isdir(os.path.join(rundir_root,sampler,app)): os.makedirs(os.path.join(rundir_root,sampler,app)) opts.run_path=os.path.abspath(os.path.join(rundir_root,sampler,app)) inifile=args[0]
# the offsets to still be 0 for coincidence testing later. for key, offset in tiling_phase.items(): if key in background_seglistdict: background_seglistdict[key].shift(offset) if key in injection_seglistdict: injection_seglistdict[key].shift(offset) background_seglistdict &= seglistdict injection_seglistdict &= seglistdict # # Start DAG # power.make_dag_directories(config_parser) dag = pipeline.CondorDAG( tempfile.mkstemp(".log", "power_", options.condor_log_dir)[1]) dag.set_dag_file(os.path.splitext(filenames[0])[0]) # # Build datafind jobs. # datafinds = power.make_datafind_stage(dag, injection_seglistdict | background_seglistdict, verbose=options.verbose) # # Main analysis #
for ifo in ifo_list: cache_files[ifo] = os.path.abspath(cache_files[ifo]) if injfile is not None: injfile=os.path.abspath(injfile) ######################################################################### # ----------------------------------------------------------------------- # DAG Writing # ----------------------------------------------------------------------- # # Initialise DAG and Jobs # # ---- Create a dag to which we can add jobs. dag = pipeline.CondorDAG(log=opts.workdir+'.log') postdag = pipeline.CondorDAG(log=opts.workdir+'_post.log') fpeakdag = pipeline.CondorDAG(log=opts.workdir+'_fpeak.log') # ---- Set the name of the file that will contain the DAG. dag.set_dag_file( 'bayeswave_{0}'.format(os.path.basename(opts.workdir)) ) postdag.set_dag_file( 'bayeswave_post_{0}'.format(os.path.basename(opts.workdir)) ) fpeakdag.set_dag_file( 'bayeswave_fpeak_{0}'.format(os.path.basename(opts.workdir)) ) # ---- Create DAG jobs # bayeswave: main bayeswave analysis # bayeswave_post: normal post-processing # bayeswave_fpeak: Spectral analysis post-processing (typically for BNS) # megasky: skymap job # megaplot: remaining plots & webpage generation # submitToGraceDB: upload skymap & PE to graceDB (optional)