def __init__(self, name, namespace=None, os='linux', arch='x86_64', installed=True, version=None, container=None): self.logical_name = name + "_ID%s" % str(Executable.id) Executable.id += 1 self.namespace = namespace self.version = version if container: self._dax_executable = dax.Executable(self.logical_name, namespace=self.namespace, version=version, os=os, arch=arch, installed=installed, container=container) else: self._dax_executable = dax.Executable(self.logical_name, namespace=self.namespace, version=version, os=os, arch=arch, installed=installed) self.in_workflow = False self.pfns = {}
def generateDax(name="mvm", inputData=None): """Generate a Pegasus DAX abstract workflow""" dax = peg.ADAG(name) taskname = "matchedVisitMetrics" arguments = " --doraise --config instrumentName='HSC' datasetName='HSC-PDR2' " \ "doApplyExternalPhotoCalib=True doApplyExternalSkyWcs=True externalPhotoCalibName=fgcm " with open(inputData, 'r') as f: for line in f: filt, tract, visits = line.strip().split(' ') outNonRepoPath = os.path.join(outPath, tract, filt) logger.debug("add job of dataId: %s %s %s to %s", filt, tract, visits, outNonRepoPath) task = peg.Job(name=taskname) task.addArguments( inputRepo, "--output", outNonRepoPath, arguments, "--id ccd=0..8^10..103 tract=%s visit=%s" % (tract, visits)) dax.addJob(task) logfile = peg.File("%s-%s-%s.log" % (taskname, tract, filt)) dax.addFile(logfile) task.setStdout(logfile) task.setStderr(logfile) task.uses(logfile, link=peg.Link.OUTPUT) return dax
def __init__(self, name='my_workflow'): self.name = name self._adag = dax.ADAG(name) self._inputs = [] self._outputs = [] self._executables = [] self.in_workflow = False self.sub_workflows = [] self._external_workflow_inputs = [] self.filename = self.name + '.dax' self.as_job = dax.DAX(self.filename)
def __init__(self, name='my_workflow'): self.name = name self._adag = dax.ADAG(name) self._inputs = [] self._outputs = [] self._executables = []
def add_profile(self, namespace, key, value): """ Add profile information to this executable """ try: entry = dax.Profile(namespace, key, value) self._dax_executable.addProfile(entry) except dax.DuplicateError: pass
def add_profile(self, namespace, key, value): """ Add profile information to this node at the DAX level """ try: entry = dax.Profile(namespace, key, value) self._dax_node.addProfile(entry) except dax.DuplicateError: pass
def add_node(self, node): """ Add a node to this workflow This function adds nodes to the workflow. It also determines parent/child relations from the DataStorage inputs to this job. Parameters ---------- node : pycbc.workflow.pegasus_workflow.Node A node that should be executed as part of this workflow. """ node._finalize() node.in_workflow = self # Record the executable that this node uses if not node.executable.in_workflow: for exe in self._executables: if node.executable.is_same_as(exe): node.executable.in_workflow = True node._dax_node.name = exe.logical_name node.executable.logical_name = exe.logical_name break else: node.executable.in_workflow = True self._executables += [node.executable] # Add the node itself self._adag.addJob(node._dax_node) # Determine the parent child relationships based on the inputs that # this node requires. added_nodes = [] for inp in node._inputs: if inp.node is not None and inp.node.in_workflow == self: if inp.node not in added_nodes: parent = inp.node._dax_node child = node._dax_node dep = dax.Dependency(parent=parent, child=child) self._adag.addDependency(dep) added_nodes.append(inp.node) elif inp.node is not None and not inp.node.in_workflow: raise ValueError('Parents of this node must be added to the ' 'workflow first.') elif inp.node is None and not inp.workflow_input: self._inputs += [inp] inp.workflow_input = True elif inp.node is not None and inp.node.in_workflow != self and inp not in self._inputs: self._inputs += [inp] self._external_workflow_inputs += [inp] # Record the outputs that this node generates self._outputs += node._outputs return self
def __init__(self, executable): self.in_workflow = False self.executable = executable self._inputs = [] self._outputs = [] self._dax_node = dax.Job(name=executable.logical_name, version=executable.version, namespace=executable.namespace) self._args = [] self._options = []
def add_profile(self, namespace, key, value, force=False): """ Add profile information to this node at the DAX level """ try: entry = dax.Profile(namespace, key, value) self._dax_node.addProfile(entry) except dax.DuplicateError: if force: # Replace with the new key self._dax_node.removeProfile(entry) self._dax_node.addProfile(entry)
def make(self, task_name, dataId=None, options=None, repo=None): job = DAX3.Job(task_name) if repo is None: repo = self.repo args = [repo] args = self._add_dataId(args, dataId) args = self._add_options(args, options) configfile = os.path.join(self.config_dir, '%s-config.py' % task_name) args.extend(['--configfile', configfile]) job.addArguments(*args) self.dax.addJob(job) if self.bin_dir is not None and self.tc is not None: self._update_tc_file(task_name) return job
def __init__(self, executable): self.in_workflow = False self.executable = executable self._inputs = [] self._outputs = [] self._dax_node = dax.Job(name=executable.logical_name, version=executable.version, namespace=executable.namespace) self._args = [] # Each value in _options is added separated with whitespace # so ['--option','value'] --> "--option value" self._options = [] # For _raw_options *NO* whitespace is added. # so ['--option','value'] --> "--optionvalue" # and ['--option',' ','value'] --> "--option value" self._raw_options = []
def make_results_web_page(workflow, results_dir, explicit_dependencies=None): template_path = 'templates/orange.html' out_dir = workflow.cp.get('results_page', 'output-path') makedir(out_dir) node = PlotExecutable(workflow.cp, 'results_page', ifos=workflow.ifos, out_dir=out_dir).create_node() node.add_opt('--plots-dir', results_dir) node.add_opt('--template-file', template_path) workflow += node if explicit_dependencies is not None: import Pegasus.DAX3 as dax for dep in explicit_dependencies: dax_dep = dax.Dependency(parent=dep._dax_node, child=node._dax_node) workflow._adag.addDependency(dax_dep)
def _make_root_dependency(self, inp): def root_path(v): path = [v] while v.in_workflow: path += [v.in_workflow] v = v.in_workflow return path workflow_root = root_path(self) input_root = root_path(inp) for step in workflow_root: if step in input_root: common = step break dep = dax.Dependency( parent=input_root[input_root.index(common) - 1].as_job, child=workflow_root[workflow_root.index(common) - 1].as_job) common._adag.addDependency(dep)
def add_node(self, node): """ Add a node to this workflow This function adds nodes to the workflow. It also determines parent/child relations from the DataStorage inputs to this job. Parameters ---------- node : Node A node that should be exectuded as part of this workflow. """ node._finalize() node.in_workflow = True self._adag.addJob(node._dax_node) # Determine the parent child relationships based on the inputs that # this node requires. for inp in node._inputs: if inp.node is not None and inp.node.in_workflow: parent = inp.node._dax_node child = node._dax_node dep = dax.Dependency(parent=parent, child=child) self._adag.addDependency(dep) elif inp.node is not None and not inp.node.in_workflow: raise ValueError('Parents of this node must be added to the ' 'workflow first.') elif inp.node is None and inp.workflow_input is False: self._inputs += [inp] inp.workflow_input = True # Record the outputs that this node generates self._outputs += node._outputs # Record the executable that this node uses if not node.executable.in_workflow: node.executable.in_workflow = True self._executables += [node.executable] return self
def insert_into_dax(self, dax): dax.addExecutable(self._dax_executable)
def insert_into_dax(self, dax): dax.addFile(self)
import os import sys import glob import pwd import time import Pegasus.DAX3 as DAX3 import desc.imsim_deep_pipeline as idp USER = pwd.getpwuid(os.getuid())[0] # Create a abstract dag dax = DAX3.ADAG("imsim_pipeline") # Add some workflow-level metadata dax.metadata("creator", "%s@%s" % (USER, os.uname()[1])) dax.metadata("created", time.ctime()) dither_info_file = 'dither_info.pkl' sensor_lists = idp.SensorLists(dither_info_file) for visit, visit_info in sensor_lists.visits: band = visit_info.band for sensor_id in visit_info.sensor_ids: make_instcat = DAX3.Job('make_instcat') make_instcat.addArguments(visit, sensor_id) instcat = DAX3.File('instcat_%(visit)s_%(sensor_id)s.txt' % locals()) make_instcat.uses(instcat, link=DAX3.Link.OUTPUT, transfer=True, register=True) dax.addJob(make_instcat) run_imsim = DAX3.Job('run_imsim')
def generateDax(name="object", inputData=None): """Generate a Pegasus DAX abstract workflow""" dax = peg.ADAG(name) # These config-ish files are expected in the input/ folder schemaAbh = peg.File("schema.abh") dax.addFile(schemaAbh) sedScript = peg.File("fixCsv.sed") dax.addFile(sedScript) partCfg = peg.File("partition.json") dax.addFile(partCfg) catYaml = peg.File("hsc.yaml") dax.addFile(catYaml) # (Ab)using the shared filesystem....!!! chunkBaseFolder = os.path.join("/project", "hchiang2", "qserv", "qqpoc") if not os.path.isdir(chunkBaseFolder): logging.warning("Chunk file base folder %s invalid", chunkBaseFolder) # Create a new database and the Object table in Qserv task0a = peg.Job(name="replctl-register") task0a.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB")) task0a.addArguments("http://lsst-qserv-master03:25080", str(database), "--felis", catYaml, "-v") dax.addJob(task0a) logfile = peg.File("qingest-a.log") dax.addFile(logfile) task0a.setStdout(logfile) task0a.setStderr(logfile) task0a.uses(logfile, link=peg.Link.OUTPUT) task0a.uses(catYaml, link=peg.Link.INPUT) # Start a super-transaction # Need to get the super transaction id from the log file task0c = peg.Job(name="replctl-trans") task0c.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB")) task0c.addArguments("http://lsst-qserv-master03:25080", str(database), "--start") dax.addJob(task0c) transIdFile = peg.File("qingest-c.log") dax.addFile(transIdFile) task0c.setStdout(transIdFile) task0c.setStderr(transIdFile) task0c.uses(transIdFile, link=peg.Link.OUTPUT) dax.depends(parent=task0a, child=task0c) # Commit a super-transaction task0d = peg.Job(name="replctl-trans") task0d.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB")) task0d.addArguments("http://lsst-qserv-master03:25080", str(database), "-a") dax.addJob(task0d) logfile = peg.File("qingest-d.log") dax.addFile(logfile) task0d.setStdout(logfile) task0d.setStderr(logfile) task0d.uses(logfile, link=peg.Link.OUTPUT) i = 0 with open(inputData, 'r') as f: for line in f: inparq = line.strip() i += 1 logging.debug('Add file %d: %s', i, inparq) taskname = 'hackType' task1 = peg.Job(name=taskname) task1.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "20GB")) outparq = peg.File("hack-%d.parq" % i) dax.addFile(outparq) task1.addArguments("-i", inparq, "-o", outparq) dax.addJob(task1) logfile = peg.File("%s-%s.log" % ( taskname, i, )) dax.addFile(logfile) task1.setStdout(logfile) task1.setStderr(logfile) task1.uses(logfile, link=peg.Link.OUTPUT) task1.uses(outparq, link=peg.Link.OUTPUT) taskname = 'pq2csv' task2 = peg.Job(name=taskname) task2.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "20GB")) outcsv = peg.File("csv-%d.csv" % i) dax.addFile(outcsv) task2.addArguments("--schema", schemaAbh, "--verbose", outparq, outcsv) dax.addJob(task2) logfile = peg.File("%s-%s.log" % ( taskname, i, )) dax.addFile(logfile) task2.setStdout(logfile) task2.setStderr(logfile) task2.uses(logfile, link=peg.Link.OUTPUT) task2.uses(schemaAbh, link=peg.Link.INPUT) task2.uses(outparq, link=peg.Link.INPUT) task2.uses(outcsv, link=peg.Link.OUTPUT) dax.depends(parent=task1, child=task2) taskname = 'sed' task3 = peg.Job(name=taskname) task3.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB")) task3.addArguments("-f", sedScript, outcsv) dax.addJob(task3) logfile = peg.File("%s-%s.log" % ( taskname, i, )) newcsv = peg.File("new-%s.csv" % (i, )) dax.addFile(logfile) task3.setStdout(newcsv) task3.setStderr(logfile) task3.uses(logfile, link=peg.Link.OUTPUT) task3.uses(newcsv, link=peg.Link.OUTPUT) task3.uses(outcsv, link=peg.Link.INPUT) task3.uses(sedScript, link=peg.Link.INPUT) dax.depends(parent=task2, child=task3) # My input csv files are larger than 1GB each and I am not splitting them for now taskname = 'partition' task4 = peg.Job(name=taskname) task4.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "15GB")) outdir = os.path.join(chunkBaseFolder, 'chunksSet' + str(i)) task4.addArguments("--verbose", "-c", partCfg, "--in.path", newcsv, "--out.dir", outdir) dax.addJob(task4) logfile = peg.File("%s-%s.log" % ( taskname, i, )) dax.addFile(logfile) task4.setStdout(logfile) task4.setStderr(logfile) task4.uses(logfile, link=peg.Link.OUTPUT) task4.uses(newcsv, link=peg.Link.INPUT) task4.uses(partCfg, link=peg.Link.INPUT) dax.depends(parent=task3, child=task4) # Look for chunk files in the output folder of this partitiong # Cannot handle smaller job units at dax creation as the folder is not yet populated; # if we want smaller units, consider using dynamic subworkflow taskname = 'allocateChunk' task5 = peg.Job(name=taskname) task5.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB")) task5.addArguments(outdir, "--idFile", transIdFile) dax.addJob(task5) logfile = peg.File("%s-%s.log" % ( taskname, i, )) dax.addFile(logfile) task5.setStdout(logfile) task5.setStderr(logfile) task5.uses(logfile, link=peg.Link.OUTPUT) task5.uses(transIdFile, link=peg.Link.INPUT) dax.depends(parent=task4, child=task5) dax.depends(parent=task0c, child=task5) taskname = 'loadData' task6 = peg.Job(name=taskname) task6.addProfile( peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB")) task6.addArguments(logfile) dax.addJob(task6) task6.uses(logfile, link=peg.Link.INPUT) logfile6 = peg.File("%s-%s.log" % ( taskname, i, )) dax.addFile(logfile6) task6.setStdout(logfile6) task6.setStderr(logfile6) task6.uses(logfile6, link=peg.Link.OUTPUT) dax.depends(parent=task5, child=task6) dax.depends(parent=task6, child=task0d) return dax
import os import sys import glob import pwd import time import Pegasus.DAX3 as DAX3 from JobMaker import JobMaker from repo_tools import * USER = pwd.getpwuid(os.getuid())[0] # Create a abstract dag dax = DAX3.ADAG("Level_2_Coadd_Pipeline") # Add some workflow-level metadata dax.metadata("creator", "%s@%s" % (USER, os.uname()[1])) dax.metadata("created", time.ctime()) input_repo = '/global/cscratch1/sd/descdm/DC1/DC1-imsim-dithered' output_repo = '.' config_dir = './configs' job_maker = JobMaker(dax, output_repo, config_dir, bin_dir='./bin', tc='tc.txt') # Loop over tracts for tract in tract_list(output_repo): # Loop over patches.
def setup_injection_minifollowups(workflow, injection_file, inj_xml_file, single_triggers, tmpltbank_file, insp_segs, insp_seg_name, dax_output, out_dir, tags=None): """ Create plots that followup the closest missed injections Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating coinc_file: single_triggers: list of pycbc.workflow.File A list cointaining the file objects associated with the merged single detector trigger files for each ifo. tmpltbank_file: pycbc.workflow.File The file object pointing to the HDF format template bank insp_segs: dict A dictionary, keyed by ifo name, of the data read by each inspiral job. insp_segs_name: str The name of the segmentlist to read from the inspiral segment file out_dir: path The directory to store minifollowups result plots and files tags: {None, optional} Tags to add to the minifollowups executables Returns ------- layout: list A list of tuples which specify the displayed file layout for the minifollops plots. """ logging.info('Entering injection minifollowups module') if not workflow.cp.has_section('workflow-injection_minifollowups'): logging.info( 'There is no [workflow-injection_minifollowups] section in configuration file' ) logging.info('Leaving minifollowups') return tags = [] if tags is None else tags makedir(dax_output) # turn the config file into a File class config_path = os.path.abspath(dax_output + '/' + '_'.join(tags) + 'injection_minifollowup.ini') workflow.cp.write(open(config_path, 'w')) config_file = wdax.File(os.path.basename(config_path)) config_file.PFN(config_path, 'local') exe = Executable(workflow.cp, 'injection_minifollowup', ifos=workflow.ifos, out_dir=dax_output) node = exe.create_node() node.add_input_opt('--config-files', config_file) node.add_input_opt('--bank-file', tmpltbank_file) node.add_input_opt('--injection-file', injection_file) node.add_input_opt('--injection-xml-file', inj_xml_file) node.add_multiifo_input_list_opt('--single-detector-triggers', single_triggers) node.add_multiifo_input_list_opt('--inspiral-segments', insp_segs.values()) node.add_opt('--inspiral-segment-name', insp_seg_name) node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file', tags=tags) node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map', tags=tags) name = node.output_files[0].name map_loc = node.output_files[1].name node.add_opt('--workflow-name', name) node.add_opt('--output-dir', out_dir) workflow += node # execute this is a sub-workflow fil = node.output_files[0] job = dax.DAX(fil) job.addArguments('--basename %s' % os.path.splitext(os.path.basename(name))[0]) Workflow.set_job_properties(job, map_loc) workflow._adag.addJob(job) dep = dax.Dependency(parent=node._dax_node, child=job) workflow._adag.addDependency(dep) logging.info('Leaving injection minifollowups module')
def main(): args = parse_args() setup_logger(args.debug) # TODO: handle execeptions for bad file paths workflow_file_path = args.cwl_workflow_file_path workflow_file_dir = os.path.dirname(workflow_file_path) log.info("Loading {}".format(workflow_file_path)) workflow = cwl.load_document(workflow_file_path) adag = dax.ADAG("dag-generated-from-cwl", auto=True) rc = ReplicaCatalog() tc = TransformationCatalog(workflow_file_dir) # process initial input file(s) # TODO: need to account for the different fields for a file class # TODO: log warning for the fields that we are skipping workflow_input_strings = dict() workflow_files = dict() log.info("Collecting inputs in {}".format(args.input_file_spec_path)) with open(args.input_file_spec_path, "r") as yaml_file: input_file_specs = load(yaml_file, Loader=Loader) for input in workflow.inputs: input_type = input.type if input_type == "File": workflow_files[get_basename(input.id)] = get_basename(input.id) # TODO: account for non-local sites rc.add_item(get_basename(input.id), input_file_specs[get_basename(input.id)]["path"], "local") elif input_type == "string": workflow_input_strings[get_basename(input.id)] = \ input_file_specs[get_basename(input.id)] elif isinstance(input_type, cwl.InputArraySchema): if input_type.items == "File": # TODO: account for workflow inputs of type File[] pass elif input_type.items == "string": workflow_input_strings[get_basename(input.id)] = \ input_file_specs[get_basename(input.id)] log.info("Collecting output files") for step in workflow.steps: cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \ else step.run for output in cwl_command_line_tool.outputs: # TODO: account for outputs that are not files output_name = get_name(step.id, output.id) log.debug("Adding (key: {0}, value: {1}) to workflow_files".format( output_name, output.outputBinding.glob)) # TODO: throw error when glob contains javascript expression # or pattern as we cannot support anything that is dynamic workflow_files[output_name] = output.outputBinding.glob log.info("Building workflow steps into dax jobs") for step in workflow.steps: # convert cwl:CommandLineTool -> pegasus:Executable cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \ else step.run executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \ os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand dax_executable = dax.Executable(executable_name) # add executable to transformation catalog tc.add_item(executable_name, cwl_command_line_tool.baseCommand) # create job with executable dax_job = dax.Job(dax_executable) step_inputs = dict() for input in step.in_: input_id = get_basename(input.id) if isinstance(input.source, str): step_inputs[input_id] = get_basename(input.source) elif isinstance(input.source, list): step_inputs[input_id] = [ get_basename(file) for file in input.source ] # add input uses to job for input in cwl_command_line_tool.inputs: if input.type == "File": file_id = step_inputs[get_name(step.id, input.id)] file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format( file_id, dax_job.name)) dax_job.uses(file, link=dax.Link.INPUT) # TODO: better type checking for string[] and File[] ? elif isinstance(input.type, cwl.CommandInputArraySchema): if input.type.items == "File": file_ids = step_inputs[get_name(step.id, input.id)] for file_id in file_ids: file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format( file_id, dax_job.name)) dax_job.uses(file, link=dax.Link.INPUT) # add output uses to job # TODO: ensure that these are of type File or File[] for output in step.out: file_id = get_basename(output) file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id)) dax_job.uses(file, link=dax.Link.OUTPUT, transfer=True, register=True) # add arguments to job # TODO: place argument building up in a function dax_job_args = cwl_command_line_tool.arguments if \ cwl_command_line_tool.arguments is not None else [] # process cwl inputBindings if they exist and build up job argument list cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs, key=lambda input : input.inputBinding.position if input.inputBinding.position \ is not None else 0 ) for input in cwl_command_line_tool_inputs: # process args if input.inputBinding is not None: # TODO: account for inputBinding separation if input.inputBinding.prefix is not None: dax_job_args.append(input.inputBinding.prefix) if input.type == "File": dax_job_args.append( dax.File(workflow_files[step_inputs[get_name( step.id, input.id)]])) if input.type == "string": dax_job_args.append( workflow_input_strings[step_inputs[get_name( step.id, input.id)]]) # handle array type inputs if isinstance(input.type, cwl.CommandInputArraySchema): if input.type.items == "File": for file in step_inputs[get_name(step.id, input.id)]: dax_job_args.append(dax.File(workflow_files[file])) elif input.type.items == "string": input_string_arr_id = step_inputs[get_name( step.id, input.id)] separator = " " if input.inputBinding.itemSeparator is None \ else input.inputBinding.itemSeparator dax_job_args.append( # TODO: currently only accounting for input strings that # are inputs to the entire workflow separator.join( workflow_input_strings[input_string_arr_id])) log.debug("Adding job: {0}, with args: {1}".format( dax_job.name, dax_job_args)) dax_job.addArguments(*dax_job_args) # add job to DAG adag.addJob(dax_job) rc.write_catalog("rc.txt") tc.write_catalog("tc.txt") with open(args.output_file_path, "w") as f: log.info("Writing DAX to {}".format(args.output_file_path)) adag.writeXML(f)
def setup_postproc_coh_PTF_workflow(workflow, trig_files, trig_cache, inj_trig_files, inj_files, inj_trig_caches, inj_caches, config_file, output_dir, html_dir, segment_dir, ifos, inj_tags=[], tags=[]): """ This module sets up the post-processing stage in the workflow, using a coh_PTF style set up. This consists of running trig_combiner to find coherent triggers, and injfinder to look for injections. It then runs a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to calculate injection statistics. Finally, efficiency and sbv_plotter jobs calculate efficiency and signal based veto statistics and make plots. workflow : pycbc.workflow.core.Workflow The Workflow instance that the jobs will be added to. trig_files : pycbc.workflow.core.FileList A FileList containing the combined databases. Returns -------- """ cp = workflow.cp full_segment = trig_files[0].segment trig_name = cp.get("workflow", "trigger-name") grb_string = "GRB" + trig_name num_trials = int(cp.get("trig_combiner", "num-trials")) pp_outs = FileList([]) pp_nodes = [] # Set up needed exe classes trig_combiner_exe = os.path.basename(cp.get("executables", "trig_combiner")) trig_combiner_class = select_generic_executable(workflow, "trig_combiner") trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster")) trig_cluster_class = select_generic_executable(workflow, "trig_cluster") sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter")) sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter") efficiency_exe = os.path.basename(cp.get("executables", "efficiency")) efficiency_class = select_generic_executable(workflow, "efficiency") """ horizon_dist_exe = os.path.basename(cp.get("executables", "horizon_dist")) horizon_dist_class = select_generic_executable(workflow, "horizon_dist") """ html_summary_exe = os.path.basename(cp.get("executables", "html_summary")) html_summary_class = select_generic_executable(workflow, "html_summary") # Set up trig_combiner job trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"] trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, out_dir=output_dir, tags=tags) trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\ trig_files, segment_dir, out_tags=trig_combiner_out_tags, tags=tags) pp_nodes.append(trig_combiner_node) workflow.add_node(trig_combiner_node) pp_outs.extend(trig_combiner_outs) # Initialise trig_cluster class trig_cluster_outs = FileList([]) trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos, out_dir=output_dir, tags=tags) # Set up injfinder jobs if cp.has_section("workflow-injections"): injfinder_nodes = [] injcombiner_parent_nodes = [] inj_sbv_plotter_parent_nodes = [] injfinder_exe = os.path.basename(cp.get("executables", "injfinder")) injfinder_class = select_generic_executable(workflow, "injfinder") injfinder_jobs = injfinder_class(cp, "injfinder", ifo=ifos, out_dir=output_dir, tags=tags) injcombiner_exe = os.path.basename(cp.get("executables", "injcombiner")) injcombiner_class = select_generic_executable(workflow, "injcombiner") injcombiner_jobs = injcombiner_class(cp, "injcombiner", ifo=ifos, out_dir=output_dir, tags=tags) injfinder_outs = FileList([]) for inj_tag in inj_tags: triggers = FileList([file for file in inj_trig_files \ if inj_tag in file.tag_str]) injections = FileList([file for file in inj_files \ if inj_tag in file.tag_str]) trig_cache = [file for file in inj_trig_caches \ if inj_tag in file.tag_str][0] inj_cache = [file for file in inj_caches \ if inj_tag in file.tag_str][0] injfinder_node, curr_outs = injfinder_jobs.create_node(\ triggers, injections, segment_dir, tags=[inj_tag]) injfinder_nodes.append(injfinder_node) pp_nodes.append(injfinder_node) workflow.add_node(injfinder_node) injfinder_outs.extend(curr_outs) if "DETECTION" not in curr_outs[0].tagged_description: injcombiner_parent_nodes.append(injfinder_node) else: inj_sbv_plotter_parent_nodes.append(injfinder_node) pp_outs.extend(injfinder_outs) # Make injfinder output cache fm_cache = File(ifos, "foundmissed", full_segment, extension="lcf", directory=output_dir) fm_cache.PFN(fm_cache.cache_entry.path, site="local") injfinder_outs.convert_to_lal_cache().tofile(\ open(fm_cache.storage_path, "w")) pp_outs.extend(FileList([fm_cache])) # Set up injcombiner jobs injcombiner_outs = FileList([file for file in injfinder_outs \ if "DETECTION" in file.tag_str]) injcombiner_tags = [inj_tag for inj_tag in inj_tags \ if "DETECTION" not in inj_tag] injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]] injcombiner_nodes = [] for injcombiner_tag in injcombiner_tags: max_inc = cp.get_opt_tags("injections", "max-inc", [injcombiner_tag]) inj_str = injcombiner_tag[:4] inputs = FileList([file for file in injfinder_outs \ if injcombiner_tag in file.tagged_description]) # if any(tag in file.tagged_description \ # for tag in injcombiner_tags)]) injcombiner_node, curr_outs = injcombiner_jobs.create_node(\ fm_cache, inputs, inj_str, max_inc, workflow.analysis_time) injcombiner_nodes.append(injcombiner_node) injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc)) injcombiner_outs.extend(curr_outs) pp_outs.extend(curr_outs) pp_nodes.append(injcombiner_node) workflow.add_node(injcombiner_node) for parent_node in injcombiner_parent_nodes: dep = dax.Dependency(parent=parent_node._dax_node, child=injcombiner_node._dax_node) workflow._adag.addDependency(dep) # Initialise injection_efficiency class inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos, out_dir=output_dir, tags=tags) # Initialise sbv_plotter class sbv_plotter_outs = FileList([]) sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos, out_dir=output_dir, tags=tags) # Initialise efficiency class efficiency_outs = FileList([]) efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos, out_dir=output_dir, tags=tags) # Add trig_cluster jobs and their corresponding plotting jobs for out_tag in trig_combiner_out_tags: unclust_file = [file for file in trig_combiner_outs \ if out_tag in file.tag_str][0] trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\ unclust_file) trig_cluster_outs.extend(curr_outs) clust_file = curr_outs[0] if out_tag != "ONSOURCE": # Add memory requirememnt for jobs with potentially large files trig_cluster_node.set_memory(1300) pp_nodes.append(trig_cluster_node) workflow.add_node(trig_cluster_node) dep = dax.Dependency(parent=trig_combiner_node._dax_node, child=trig_cluster_node._dax_node) workflow._adag.addDependency(dep) # Add sbv_plotter job sbv_out_tags = [out_tag, "_clustered"] sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file, segment_dir, tags=sbv_out_tags) pp_nodes.append(sbv_plotter_node) workflow.add_node(sbv_plotter_node) dep = dax.Dependency(parent=trig_cluster_node._dax_node, child=sbv_plotter_node._dax_node) workflow._adag.addDependency(dep) # Add injection sbv_plotter nodes if appropriate if out_tag == "OFFSOURCE" and \ cp.has_section("workflow-injections"): offsource_clustered = clust_file off_node = sbv_plotter_node found_inj_files = FileList([file for file in injcombiner_outs \ if "FOUND" in file.tag_str]) for curr_injs in found_inj_files: curr_tags = [tag for tag in injcombiner_out_tags \ if tag in curr_injs.name] curr_tags.append("_clustered") sbv_plotter_node = sbv_plotter_jobs.create_node( clust_file, segment_dir, inj_file=curr_injs, tags=curr_tags) pp_nodes.append(sbv_plotter_node) workflow.add_node(sbv_plotter_node) dep = dax.Dependency(parent=trig_cluster_node._dax_node, child=sbv_plotter_node._dax_node) workflow._adag.addDependency(dep) if "DETECTION" in curr_injs.tagged_description: for parent_node in inj_sbv_plotter_parent_nodes: dep = dax.Dependency( parent=parent_node._dax_node, child=sbv_plotter_node._dax_node) workflow._adag.addDependency(dep) else: for parent_node in injcombiner_nodes: dep = dax.Dependency( parent=parent_node._dax_node, child=sbv_plotter_node._dax_node) workflow._adag.addDependency(dep) # Also add sbv_plotter job for unclustered triggers sbv_plotter_node = sbv_plotter_jobs.create_node( unclust_file, segment_dir, tags=[out_tag, "_unclustered"]) sbv_plotter_node.set_memory(1300) pp_nodes.append(sbv_plotter_node) workflow.add_node(sbv_plotter_node) dep = dax.Dependency(parent=trig_combiner_node._dax_node, child=sbv_plotter_node._dax_node) workflow._adag.addDependency(dep) else: pp_nodes.append(trig_cluster_node) workflow.add_node(trig_cluster_node) dep = dax.Dependency(parent=trig_combiner_node._dax_node, child=trig_cluster_node._dax_node) workflow._adag.addDependency(dep) # Add efficiency job for on/off efficiency_node = efficiency_jobs.create_node(clust_file, offsource_clustered, segment_dir, tags=[out_tag]) pp_nodes.append(efficiency_node) workflow.add_node(efficiency_node) dep = dax.Dependency(parent=off_node._dax_node, child=efficiency_node._dax_node) workflow._adag.addDependency(dep) if cp.has_section("workflow-injections"): for tag in injcombiner_out_tags: if "_FILTERED_" in tag: inj_set_tag = [t for t in inj_tags if \ str(tag).replace("_FILTERED_", "") \ in t][0] else: inj_set_tag = str(tag) found_file = [file for file in injcombiner_outs \ if tag + "_FOUND" in file.tag_str][0] missed_file = [file for file in injcombiner_outs \ if tag + "_MISSED" in file.tag_str][0] inj_efficiency_node = inj_efficiency_jobs.create_node(\ clust_file, offsource_clustered, segment_dir, found_file, missed_file, tags=[out_tag, tag, inj_set_tag]) pp_nodes.append(inj_efficiency_node) workflow.add_node(inj_efficiency_node) dep = dax.Dependency(parent=off_node._dax_node, child=inj_efficiency_node._dax_node) workflow._adag.addDependency(dep) for injcombiner_node in injcombiner_nodes: dep = dax.Dependency( parent=injcombiner_node._dax_node, child=inj_efficiency_node._dax_node) workflow._adag.addDependency(dep) for injfinder_node in injfinder_nodes: dep = dax.Dependency( parent=injfinder_node._dax_node, child=inj_efficiency_node._dax_node) workflow._adag.addDependency(dep) # Add further trig_cluster jobs for trials trial = 1 while trial <= num_trials: trial_tag = "OFFTRIAL_%d" % trial unclust_file = [file for file in trig_combiner_outs \ if trial_tag in file.tag_str][0] trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\ unclust_file) clust_file = clust_outs[0] trig_cluster_outs.extend(clust_outs) pp_nodes.append(trig_cluster_node) workflow.add_node(trig_cluster_node) dep = dax.Dependency(parent=trig_combiner_node._dax_node, child=trig_cluster_node._dax_node) workflow._adag.addDependency(dep) # Add efficiency job efficiency_node = efficiency_jobs.create_node(clust_file, offsource_clustered, segment_dir, tags=[trial_tag]) pp_nodes.append(efficiency_node) workflow.add_node(efficiency_node) dep = dax.Dependency(parent=off_node._dax_node, child=efficiency_node._dax_node) workflow._adag.addDependency(dep) dep = dax.Dependency(parent=trig_cluster_node._dax_node, child=efficiency_node._dax_node) workflow._adag.addDependency(dep) # Adding inj_efficiency job if cp.has_section("workflow-injections"): for tag in injcombiner_out_tags: if "_FILTERED_" in tag: inj_set_tag = [t for t in inj_tags if \ str(tag).replace("_FILTERED_", "") in t][0] else: inj_set_tag = str(tag) found_file = [file for file in injcombiner_outs \ if tag + "_FOUND" in file.tag_str][0] missed_file = [file for file in injcombiner_outs \ if tag + "_MISSED" in file.tag_str][0] inj_efficiency_node = inj_efficiency_jobs.create_node(\ clust_file, offsource_clustered, segment_dir, found_file, missed_file, tags=[trial_tag, tag, inj_set_tag]) pp_nodes.append(inj_efficiency_node) workflow.add_node(inj_efficiency_node) dep = dax.Dependency(parent=off_node._dax_node, child=inj_efficiency_node._dax_node) workflow._adag.addDependency(dep) for injcombiner_node in injcombiner_nodes: dep = dax.Dependency(parent=injcombiner_node._dax_node, child=inj_efficiency_node._dax_node) workflow._adag.addDependency(dep) for injfinder_node in injfinder_nodes: dep = dax.Dependency(parent=injfinder_node._dax_node, child=inj_efficiency_node._dax_node) workflow._adag.addDependency(dep) trial += 1 # Initialise html_summary class and set up job #FIXME: We may want this job to run even if some jobs fail html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos, out_dir=output_dir, tags=tags) if cp.has_section("workflow-injections"): tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \ if "DETECTION" in inj_tag] exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \ if "DETECTION" not in inj_tag] html_summary_node = html_summary_jobs.create_node( c_file=config_file, tuning_tags=tuning_tags, exclusion_tags=exclusion_tags, html_dir=html_dir) else: html_summary_node = html_summary_jobs.create_node(c_file=config_file, html_dir=html_dir) workflow.add_node(html_summary_node) for pp_node in pp_nodes: dep = dax.Dependency(parent=pp_node._dax_node, child=html_summary_node._dax_node) workflow._adag.addDependency(dep) # Make the open box shell script open_box_cmd = html_summary_node.executable.get_pfn() + " " open_box_cmd += ' '.join(html_summary_node._args + \ html_summary_node._options) open_box_cmd += " --open-box" open_box_path = "%s/open_the_box.sh" % output_dir f = open(open_box_path, "w") f.write("#!/bin/sh\n%s" % open_box_cmd) f.close() os.chmod(open_box_path, 0500) pp_outs.extend(trig_cluster_outs) return pp_outs
def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file, insp_segs, insp_data_name, insp_anal_name, dax_output, out_dir, veto_file=None, veto_segment_name=None, statfiles=None, tags=None): """ Create plots that followup the Nth loudest clustered single detector triggers from a merged single detector trigger HDF file. Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating single_trig_file: pycbc.workflow.File The File class holding the single detector triggers. tmpltbank_file: pycbc.workflow.File The file object pointing to the HDF format template bank insp_segs: SegFile The segment file containing the data read by each inspiral job. insp_data_name: str The name of the segmentlist storing data read. insp_anal_name: str The name of the segmentlist storing data analyzed. out_dir: path The directory to store minifollowups result plots and files statfiles: FileList (optional, default=None) Supplementary files necessary for computing the single-detector statistic. tags: {None, optional} Tags to add to the minifollowups executables Returns ------- layout: list A list of tuples which specify the displayed file layout for the minifollops plots. """ logging.info('Entering minifollowups module') if not workflow.cp.has_section('workflow-sngl_minifollowups'): msg = 'There is no [workflow-sngl_minifollowups] section in ' msg += 'configuration file' logging.info(msg) logging.info('Leaving minifollowups') return tags = [] if tags is None else tags makedir(dax_output) # turn the config file into a File class curr_ifo = single_trig_file.ifo config_path = os.path.abspath(dax_output + '/' + curr_ifo + \ '_'.join(tags) + 'singles_minifollowup.ini') workflow.cp.write(open(config_path, 'w')) config_file = wdax.File(os.path.basename(config_path)) config_file.PFN(urljoin('file:', pathname2url(config_path)), site='local') exe = Executable(workflow.cp, 'singles_minifollowup', ifos=curr_ifo, out_dir=dax_output, tags=tags) wikifile = curr_ifo + '_'.join(tags) + 'loudest_table.txt' node = exe.create_node() node.add_input_opt('--config-files', config_file) node.add_input_opt('--bank-file', tmpltbank_file) node.add_input_opt('--single-detector-file', single_trig_file) node.add_input_opt('--inspiral-segments', insp_segs) node.add_opt('--inspiral-data-read-name', insp_data_name) node.add_opt('--inspiral-data-analyzed-name', insp_anal_name) node.add_opt('--instrument', curr_ifo) node.add_opt('--wiki-file', wikifile) if veto_file is not None: assert(veto_segment_name is not None) node.add_input_opt('--veto-file', veto_file) node.add_opt('--veto-segment-name', veto_segment_name) if statfiles: statfiles = statfiles.find_output_with_ifo(curr_ifo) node.add_input_list_opt('--statistic-files', statfiles) node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file') node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map') node.new_output_file_opt(workflow.analysis_time, '.tc.txt', '--transformation-catalog') name = node.output_files[0].name map_file = node.output_files[1] tc_file = node.output_files[2] node.add_opt('--workflow-name', name) node.add_opt('--output-dir', out_dir) workflow += node # execute this in a sub-workflow fil = node.output_files[0] # determine if a staging site has been specified try: staging_site = workflow.cp.get('workflow-sngl_minifollowups', 'staging-site') except: staging_site = None job = dax.DAX(fil) job.addArguments('--basename %s' \ % os.path.splitext(os.path.basename(name))[0]) Workflow.set_job_properties(job, map_file, tc_file, staging_site=staging_site) workflow._adag.addJob(job) dep = dax.Dependency(parent=node._dax_node, child=job) workflow._adag.addDependency(dep) logging.info('Leaving minifollowups module')
def add_profile(self, namespace, key, value): """ Add profile information to this node at the DAX level """ entry = dax.Profile(namespace, key, value) self._dax_node.addProfile(entry)
def setup_foreground_inference(workflow, coinc_file, single_triggers, tmpltbank_file, insp_segs, insp_data_name, insp_anal_name, dax_output, out_dir, tags=None): """ Creates workflow node that will run the inference workflow. Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating coinc_file: pycbc.workflow.File The file associated with coincident triggers. single_triggers: list of pycbc.workflow.File A list cointaining the file objects associated with the merged single detector trigger files for each ifo. tmpltbank_file: pycbc.workflow.File The file object pointing to the HDF format template bank insp_segs: SegFile The segment file containing the data read and analyzed by each inspiral job. insp_data_name: str The name of the segmentlist storing data read. insp_anal_name: str The name of the segmentlist storing data analyzed. dax_output : str The name of the output DAX file. out_dir: path The directory to store inference result plots and files tags: {None, optional} Tags to add to the inference executables """ logging.info("Entering inference module") # check if configuration file has inference section if not workflow.cp.has_section("workflow-inference"): logging.info( "There is no [workflow-inference] section in configuration file") logging.info("Leaving inference module") return # default tags is a list tags = [] if tags is None else tags # make the directory that will contain the dax file makedir(dax_output) # turn the config file into a File class config_path = os.path.abspath(dax_output + "/" + "_".join(tags) \ + "foreground_inference.ini") workflow.cp.write(open(config_path, "w")) config_file = wdax.File(os.path.basename(config_path)) config_file.PFN(config_path, "local") # create an Executable for the inference workflow generator exe = Executable(workflow.cp, "foreground_inference", ifos=workflow.ifos, out_dir=dax_output) # create the node that will run in the workflow node = exe.create_node() node.add_input_opt("--config-files", config_file) node.add_input_opt("--bank-file", tmpltbank_file) node.add_input_opt("--statmap-file", coinc_file) node.add_multiifo_input_list_opt("--single-detector-triggers", single_triggers) node.new_output_file_opt(workflow.analysis_time, ".dax", "--output-file", tags=tags) node.new_output_file_opt(workflow.analysis_time, ".dax.map", "--output-map", tags=tags) node.new_output_file_opt(workflow.analysis_time, ".tc.txt", "--transformation-catalog", tags=tags) # get dax name and use it for the workflow name name = node.output_files[0].name node.add_opt("--workflow-name", name) # get output map name and use it for the output dir name map_file = node.output_files[1] node.add_opt("--output-dir", out_dir) # get the transformation catalog name tc_file = node.output_files[2] # add this node to the workflow workflow += node # create job for dax that will run a sub-workflow # and add it to the workflow fil = node.output_files[0] job = dax.DAX(fil) job.addArguments("--basename %s" % os.path.splitext(os.path.basename(name))[0]) Workflow.set_job_properties(job, map_file, tc_file) workflow._adag.addJob(job) # make dax a child of the inference workflow generator node dep = dax.Dependency(parent=node._dax_node, child=job) workflow._adag.addDependency(dep) logging.info("Leaving inference module")
import os import pwd import time import Pegasus.DAX3 as DAX3 USER = pwd.getpwuid(os.getuid())[0] # Create a abstract dag dax = DAX3.ADAG("Strong Lensing Pipeline") # Add some workflow-level metadata dax.metadata("creator", "%s@%s" % (USER, os.uname()[1])) dax.metadata("created", time.ctime()) dm_level1_catalog = DAX3.File('dm_level1_catalog') dm_images = DAX3.File('dm_image_data') SL_candidates = DAX3.File('SL_candidates') SLFinder = DAX3.Job('SLFinder') SLFinder.uses(dm_level1_catalog, link=DAX3.Link.INPUT) SLFinder.uses(dm_images, link=DAX3.Link.INPUT) SLFinder.uses(SL_candidates, link=DAX3.Link.OUTPUT, register=True, transfer=True) dax.addJob(SLFinder) DESC_Lenses = DAX3.File('DESC_Lenses') SpaceWarps = DAX3.Job('SpaceWarps') SpaceWarps.uses(SL_candidates, link=DAX3.Link.INPUT) SpaceWarps.uses(DESC_Lenses, link=DAX3.Link.OUTPUT,
def has_pfn(self, url, site=None): """ Wrapper of the pegasus hasPFN function, that allows it to be called outside of specific pegasus functions. """ curr_pfn = dax.PFN(url, site) return self.hasPFN(curr_pfn)
def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file, insp_segs, insp_seg_name, dax_output, out_dir, veto_file=None, veto_segment_name=None, tags=None): """ Create plots that followup the Nth loudest clustered single detector triggers from a merged single detector trigger HDF file. Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating single_trig_file: pycbc.workflow.File The File class holding the single detector triggers. tmpltbank_file: pycbc.workflow.File The file object pointing to the HDF format template bank insp_segs: dict A dictionary, keyed by ifo name, of the data read by each inspiral job. insp_segs_name: str The name of the segmentlist to read from the inspiral segment file out_dir: path The directory to store minifollowups result plots and files tags: {None, optional} Tags to add to the minifollowups executables Returns ------- layout: list A list of tuples which specify the displayed file layout for the minifollops plots. """ logging.info('Entering minifollowups module') if not workflow.cp.has_section('workflow-minifollowups'): msg = 'There is no [workflow-minifollowups] section in ' msg += 'configuration file' logging.info(msg) logging.info('Leaving minifollowups') return tags = [] if tags is None else tags makedir(dax_output) # turn the config file into a File class curr_ifo = single_trig_file.ifo config_path = os.path.abspath(dax_output + '/' + curr_ifo + \ '_'.join(tags) + 'singles_minifollowup.ini') workflow.cp.write(open(config_path, 'w')) config_file = wdax.File(os.path.basename(config_path)) config_file.PFN(config_path, 'local') exe = Executable(workflow.cp, 'singles_minifollowup', ifos=curr_ifo, out_dir=dax_output) node = exe.create_node() node.add_input_opt('--config-files', config_file) node.add_input_opt('--bank-file', tmpltbank_file) node.add_input_opt('--single-detector-file', single_trig_file) node.add_input_opt('--inspiral-segments', insp_segs[curr_ifo]) node.add_opt('--inspiral-segment-name', insp_seg_name) node.add_opt('--instrument', curr_ifo) if veto_file is not None: assert (veto_segment_name is not None) node.add_input_opt('--veto-file', veto_file) node.add_opt('--veto-segment-name', veto_segment_name) node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file', tags=tags) node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map', tags=tags) name = node.output_files[0].name map_loc = node.output_files[1].name node.add_opt('--workflow-name', name) node.add_opt('--output-dir', out_dir) workflow += node # execute this is a sub-workflow fil = node.output_files[0] job = dax.DAX(fil) job.addArguments('--basename %s' \ % os.path.splitext(os.path.basename(name))[0]) Workflow.set_job_properties(job, map_loc) workflow._adag.addJob(job) dep = dax.Dependency(parent=node._dax_node, child=job) workflow._adag.addDependency(dep) logging.info('Leaving minifollowups module')
def setup_foreground_minifollowups(workflow, coinc_file, single_triggers, tmpltbank_file, insp_segs, insp_data_name, insp_anal_name, dax_output, out_dir, tags=None): """ Create plots that followup the Nth loudest coincident injection from a statmap produced HDF file. Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating coinc_file: single_triggers: list of pycbc.workflow.File A list cointaining the file objects associated with the merged single detector trigger files for each ifo. tmpltbank_file: pycbc.workflow.File The file object pointing to the HDF format template bank insp_segs: SegFile The segment file containing the data read and analyzed by each inspiral job. insp_data_name: str The name of the segmentlist storing data read. insp_anal_name: str The name of the segmentlist storing data analyzed. out_dir: path The directory to store minifollowups result plots and files tags: {None, optional} Tags to add to the minifollowups executables Returns ------- layout: list A list of tuples which specify the displayed file layout for the minifollops plots. """ logging.info('Entering minifollowups module') if not workflow.cp.has_section('workflow-minifollowups'): logging.info('There is no [workflow-minifollowups] section in configuration file') logging.info('Leaving minifollowups') return tags = [] if tags is None else tags makedir(dax_output) # turn the config file into a File class config_path = os.path.abspath(dax_output + '/' + '_'.join(tags) + 'foreground_minifollowup.ini') workflow.cp.write(open(config_path, 'w')) config_file = wdax.File(os.path.basename(config_path)) config_file.PFN(urljoin('file:', pathname2url(config_path)), site='local') exe = Executable(workflow.cp, 'foreground_minifollowup', ifos=workflow.ifos, out_dir=dax_output, tags=tags) node = exe.create_node() node.add_input_opt('--config-files', config_file) node.add_input_opt('--bank-file', tmpltbank_file) node.add_input_opt('--statmap-file', coinc_file) node.add_multiifo_input_list_opt('--single-detector-triggers', single_triggers) node.add_input_opt('--inspiral-segments', insp_segs) node.add_opt('--inspiral-data-read-name', insp_data_name) node.add_opt('--inspiral-data-analyzed-name', insp_anal_name) if tags: node.add_list_opt('--tags', tags) node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file') node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map') node.new_output_file_opt(workflow.analysis_time, '.tc.txt', '--transformation-catalog') name = node.output_files[0].name map_file = node.output_files[1] tc_file = node.output_files[2] node.add_opt('--workflow-name', name) node.add_opt('--output-dir', out_dir) workflow += node # execute this in a sub-workflow fil = node.output_files[0] # determine if a staging site has been specified try: staging_site = workflow.cp.get('workflow-foreground_minifollowups', 'staging-site') except: staging_site = None job = dax.DAX(fil) job.addArguments('--basename %s' % os.path.splitext(os.path.basename(name))[0]) Workflow.set_job_properties(job, map_file, tc_file, staging_site=staging_site) workflow._adag.addJob(job) dep = dax.Dependency(parent=node._dax_node, child=job) workflow._adag.addDependency(dep) logging.info('Leaving minifollowups module')
def add_profile(self, namespace, key, value): """ Add profile information to this executable """ entry = dax.Profile(namespace, key, value) self._dax_executable.addProfile(entry)