예제 #1
0
 def __init__(self,
              name,
              namespace=None,
              os='linux',
              arch='x86_64',
              installed=True,
              version=None,
              container=None):
     self.logical_name = name + "_ID%s" % str(Executable.id)
     Executable.id += 1
     self.namespace = namespace
     self.version = version
     if container:
         self._dax_executable = dax.Executable(self.logical_name,
                                               namespace=self.namespace,
                                               version=version,
                                               os=os,
                                               arch=arch,
                                               installed=installed,
                                               container=container)
     else:
         self._dax_executable = dax.Executable(self.logical_name,
                                               namespace=self.namespace,
                                               version=version,
                                               os=os,
                                               arch=arch,
                                               installed=installed)
     self.in_workflow = False
     self.pfns = {}
예제 #2
0
def generateDax(name="mvm", inputData=None):
    """Generate a Pegasus DAX abstract workflow"""
    dax = peg.ADAG(name)
    taskname = "matchedVisitMetrics"
    arguments = " --doraise --config instrumentName='HSC' datasetName='HSC-PDR2' " \
                "doApplyExternalPhotoCalib=True doApplyExternalSkyWcs=True externalPhotoCalibName=fgcm "

    with open(inputData, 'r') as f:
        for line in f:
            filt, tract, visits = line.strip().split(' ')
            outNonRepoPath = os.path.join(outPath, tract, filt)

            logger.debug("add job of dataId: %s %s %s to %s", filt, tract,
                         visits, outNonRepoPath)
            task = peg.Job(name=taskname)
            task.addArguments(
                inputRepo, "--output", outNonRepoPath, arguments,
                "--id ccd=0..8^10..103 tract=%s visit=%s" % (tract, visits))
            dax.addJob(task)

            logfile = peg.File("%s-%s-%s.log" % (taskname, tract, filt))
            dax.addFile(logfile)
            task.setStdout(logfile)
            task.setStderr(logfile)
            task.uses(logfile, link=peg.Link.OUTPUT)

    return dax
예제 #3
0
    def __init__(self, name='my_workflow'):
        self.name = name
        self._adag = dax.ADAG(name)

        self._inputs = []
        self._outputs = []
        self._executables = []
        self.in_workflow = False
        self.sub_workflows = []
        self._external_workflow_inputs = []
        self.filename = self.name + '.dax'

        self.as_job = dax.DAX(self.filename)
    def __init__(self, name='my_workflow'):
        self.name = name
        self._adag = dax.ADAG(name)

        self._inputs = []
        self._outputs = []
        self._executables = []
예제 #5
0
 def add_profile(self, namespace, key, value):
     """ Add profile information to this executable
     """
     try:
         entry = dax.Profile(namespace, key, value)
         self._dax_executable.addProfile(entry)
     except dax.DuplicateError:
         pass
예제 #6
0
 def add_profile(self, namespace, key, value):
     """ Add profile information to this node at the DAX level
     """
     try:
         entry = dax.Profile(namespace, key, value)
         self._dax_node.addProfile(entry)
     except dax.DuplicateError:
         pass
예제 #7
0
    def add_node(self, node):
        """ Add a node to this workflow

        This function adds nodes to the workflow. It also determines
        parent/child relations from the DataStorage inputs to this job.

        Parameters
        ----------
        node : pycbc.workflow.pegasus_workflow.Node
            A node that should be executed as part of this workflow.
        """
        node._finalize()
        node.in_workflow = self

        # Record the executable that this node uses
        if not node.executable.in_workflow:
            for exe in self._executables:
                if node.executable.is_same_as(exe):
                    node.executable.in_workflow = True
                    node._dax_node.name = exe.logical_name
                    node.executable.logical_name = exe.logical_name
                    break
            else:
                node.executable.in_workflow = True
                self._executables += [node.executable]

        # Add the node itself
        self._adag.addJob(node._dax_node)

        # Determine the parent child relationships based on the inputs that
        # this node requires.
        added_nodes = []
        for inp in node._inputs:
            if inp.node is not None and inp.node.in_workflow == self:
                if inp.node not in added_nodes:
                    parent = inp.node._dax_node
                    child = node._dax_node
                    dep = dax.Dependency(parent=parent, child=child)
                    self._adag.addDependency(dep)
                    added_nodes.append(inp.node)

            elif inp.node is not None and not inp.node.in_workflow:
                raise ValueError('Parents of this node must be added to the '
                                 'workflow first.')

            elif inp.node is None and not inp.workflow_input:
                self._inputs += [inp]
                inp.workflow_input = True

            elif inp.node is not None and inp.node.in_workflow != self and inp not in self._inputs:
                self._inputs += [inp]
                self._external_workflow_inputs += [inp]

        # Record the outputs that this node generates
        self._outputs += node._outputs

        return self
 def __init__(self, executable):
     self.in_workflow = False
     self.executable = executable
     self._inputs = []
     self._outputs = []
     self._dax_node = dax.Job(name=executable.logical_name,
                              version=executable.version,
                              namespace=executable.namespace)
     self._args = []
     self._options = []
예제 #9
0
 def add_profile(self, namespace, key, value, force=False):
     """ Add profile information to this node at the DAX level
     """
     try:
         entry = dax.Profile(namespace, key, value)
         self._dax_node.addProfile(entry)
     except dax.DuplicateError:
         if force:
             # Replace with the new key
             self._dax_node.removeProfile(entry)
             self._dax_node.addProfile(entry)
예제 #10
0
 def make(self, task_name, dataId=None, options=None, repo=None):
     job = DAX3.Job(task_name)
     if repo is None:
         repo = self.repo
     args = [repo]
     args = self._add_dataId(args, dataId)
     args = self._add_options(args, options)
     configfile = os.path.join(self.config_dir, '%s-config.py' % task_name)
     args.extend(['--configfile', configfile])
     job.addArguments(*args)
     self.dax.addJob(job)
     if self.bin_dir is not None and self.tc is not None:
         self._update_tc_file(task_name)
     return job
예제 #11
0
 def __init__(self, executable):
     self.in_workflow = False
     self.executable = executable
     self._inputs = []
     self._outputs = []
     self._dax_node = dax.Job(name=executable.logical_name,
                              version=executable.version,
                              namespace=executable.namespace)
     self._args = []
     # Each value in _options is added separated with whitespace
     # so ['--option','value'] --> "--option value"
     self._options = []
     # For _raw_options *NO* whitespace is added.
     # so ['--option','value'] --> "--optionvalue"
     # and ['--option',' ','value'] --> "--option value"
     self._raw_options = []
예제 #12
0
def make_results_web_page(workflow, results_dir, explicit_dependencies=None):
    template_path = 'templates/orange.html'

    out_dir = workflow.cp.get('results_page', 'output-path')
    makedir(out_dir)
    node = PlotExecutable(workflow.cp, 'results_page', ifos=workflow.ifos,
                out_dir=out_dir).create_node()
    node.add_opt('--plots-dir', results_dir)
    node.add_opt('--template-file', template_path)
    workflow += node
    if explicit_dependencies is not None:
        import Pegasus.DAX3 as dax
        for dep in explicit_dependencies:
            dax_dep = dax.Dependency(parent=dep._dax_node,
                                     child=node._dax_node)
            workflow._adag.addDependency(dax_dep)
예제 #13
0
    def _make_root_dependency(self, inp):
        def root_path(v):
            path = [v]
            while v.in_workflow:
                path += [v.in_workflow]
                v = v.in_workflow
            return path

        workflow_root = root_path(self)
        input_root = root_path(inp)
        for step in workflow_root:
            if step in input_root:
                common = step
                break
        dep = dax.Dependency(
            parent=input_root[input_root.index(common) - 1].as_job,
            child=workflow_root[workflow_root.index(common) - 1].as_job)
        common._adag.addDependency(dep)
예제 #14
0
    def add_node(self, node):
        """ Add a node to this workflow
        
        This function adds nodes to the workflow. It also determines
        parent/child relations from the DataStorage inputs to this job. 
        
        Parameters
        ----------
        node : Node
            A node that should be exectuded as part of this workflow.
        """
        node._finalize()
        node.in_workflow = True
        self._adag.addJob(node._dax_node)

        # Determine the parent child relationships based on the inputs that
        # this node requires.
        for inp in node._inputs:
            if inp.node is not None and inp.node.in_workflow:
                parent = inp.node._dax_node
                child = node._dax_node
                dep = dax.Dependency(parent=parent, child=child)
                self._adag.addDependency(dep)

            elif inp.node is not None and not inp.node.in_workflow:
                raise ValueError('Parents of this node must be added to the '
                                 'workflow first.')

            elif inp.node is None and inp.workflow_input is False:
                self._inputs += [inp]
                inp.workflow_input = True

        # Record the outputs that this node generates
        self._outputs += node._outputs

        # Record the executable that this node uses
        if not node.executable.in_workflow:
            node.executable.in_workflow = True
            self._executables += [node.executable]

        return self
예제 #15
0
 def insert_into_dax(self, dax):
     dax.addExecutable(self._dax_executable)
예제 #16
0
 def insert_into_dax(self, dax):
     dax.addFile(self)
예제 #17
0
 def insert_into_dax(self, dax):
     dax.addFile(self)
예제 #18
0
import os
import sys
import glob
import pwd
import time
import Pegasus.DAX3 as DAX3
import desc.imsim_deep_pipeline as idp

USER = pwd.getpwuid(os.getuid())[0]

# Create a abstract dag
dax = DAX3.ADAG("imsim_pipeline")

# Add some workflow-level metadata
dax.metadata("creator", "%s@%s" % (USER, os.uname()[1]))
dax.metadata("created", time.ctime())

dither_info_file = 'dither_info.pkl'
sensor_lists = idp.SensorLists(dither_info_file)

for visit, visit_info in sensor_lists.visits:
    band = visit_info.band
    for sensor_id in visit_info.sensor_ids:
        make_instcat = DAX3.Job('make_instcat')
        make_instcat.addArguments(visit, sensor_id)
        instcat = DAX3.File('instcat_%(visit)s_%(sensor_id)s.txt' % locals())
        make_instcat.uses(instcat, link=DAX3.Link.OUTPUT, transfer=True,
                          register=True)
        dax.addJob(make_instcat)

        run_imsim = DAX3.Job('run_imsim')
예제 #19
0
def generateDax(name="object", inputData=None):
    """Generate a Pegasus DAX abstract workflow"""
    dax = peg.ADAG(name)

    # These config-ish files are expected in the input/ folder
    schemaAbh = peg.File("schema.abh")
    dax.addFile(schemaAbh)
    sedScript = peg.File("fixCsv.sed")
    dax.addFile(sedScript)
    partCfg = peg.File("partition.json")
    dax.addFile(partCfg)
    catYaml = peg.File("hsc.yaml")
    dax.addFile(catYaml)

    # (Ab)using the shared filesystem....!!!
    chunkBaseFolder = os.path.join("/project", "hchiang2", "qserv", "qqpoc")
    if not os.path.isdir(chunkBaseFolder):
        logging.warning("Chunk file base folder %s invalid", chunkBaseFolder)

    # Create a new database and the Object table in Qserv
    task0a = peg.Job(name="replctl-register")
    task0a.addProfile(
        peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
    task0a.addArguments("http://lsst-qserv-master03:25080", str(database),
                        "--felis", catYaml, "-v")
    dax.addJob(task0a)
    logfile = peg.File("qingest-a.log")
    dax.addFile(logfile)
    task0a.setStdout(logfile)
    task0a.setStderr(logfile)
    task0a.uses(logfile, link=peg.Link.OUTPUT)
    task0a.uses(catYaml, link=peg.Link.INPUT)

    # Start a super-transaction
    # Need to get the super transaction id from the log file
    task0c = peg.Job(name="replctl-trans")
    task0c.addProfile(
        peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
    task0c.addArguments("http://lsst-qserv-master03:25080", str(database),
                        "--start")
    dax.addJob(task0c)
    transIdFile = peg.File("qingest-c.log")
    dax.addFile(transIdFile)
    task0c.setStdout(transIdFile)
    task0c.setStderr(transIdFile)
    task0c.uses(transIdFile, link=peg.Link.OUTPUT)
    dax.depends(parent=task0a, child=task0c)

    # Commit a super-transaction
    task0d = peg.Job(name="replctl-trans")
    task0d.addProfile(
        peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
    task0d.addArguments("http://lsst-qserv-master03:25080", str(database),
                        "-a")
    dax.addJob(task0d)
    logfile = peg.File("qingest-d.log")
    dax.addFile(logfile)
    task0d.setStdout(logfile)
    task0d.setStderr(logfile)
    task0d.uses(logfile, link=peg.Link.OUTPUT)

    i = 0
    with open(inputData, 'r') as f:
        for line in f:
            inparq = line.strip()
            i += 1
            logging.debug('Add file %d: %s', i, inparq)

            taskname = 'hackType'
            task1 = peg.Job(name=taskname)
            task1.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "20GB"))
            outparq = peg.File("hack-%d.parq" % i)
            dax.addFile(outparq)
            task1.addArguments("-i", inparq, "-o", outparq)
            dax.addJob(task1)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task1.setStdout(logfile)
            task1.setStderr(logfile)
            task1.uses(logfile, link=peg.Link.OUTPUT)
            task1.uses(outparq, link=peg.Link.OUTPUT)

            taskname = 'pq2csv'
            task2 = peg.Job(name=taskname)
            task2.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "20GB"))
            outcsv = peg.File("csv-%d.csv" % i)
            dax.addFile(outcsv)
            task2.addArguments("--schema", schemaAbh, "--verbose", outparq,
                               outcsv)
            dax.addJob(task2)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task2.setStdout(logfile)
            task2.setStderr(logfile)
            task2.uses(logfile, link=peg.Link.OUTPUT)
            task2.uses(schemaAbh, link=peg.Link.INPUT)
            task2.uses(outparq, link=peg.Link.INPUT)
            task2.uses(outcsv, link=peg.Link.OUTPUT)
            dax.depends(parent=task1, child=task2)

            taskname = 'sed'
            task3 = peg.Job(name=taskname)
            task3.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
            task3.addArguments("-f", sedScript, outcsv)
            dax.addJob(task3)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            newcsv = peg.File("new-%s.csv" % (i, ))
            dax.addFile(logfile)
            task3.setStdout(newcsv)
            task3.setStderr(logfile)
            task3.uses(logfile, link=peg.Link.OUTPUT)
            task3.uses(newcsv, link=peg.Link.OUTPUT)
            task3.uses(outcsv, link=peg.Link.INPUT)
            task3.uses(sedScript, link=peg.Link.INPUT)
            dax.depends(parent=task2, child=task3)

            # My input csv files are larger than 1GB each and I am not splitting them for now
            taskname = 'partition'
            task4 = peg.Job(name=taskname)
            task4.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "15GB"))
            outdir = os.path.join(chunkBaseFolder, 'chunksSet' + str(i))
            task4.addArguments("--verbose", "-c", partCfg, "--in.path", newcsv,
                               "--out.dir", outdir)
            dax.addJob(task4)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task4.setStdout(logfile)
            task4.setStderr(logfile)
            task4.uses(logfile, link=peg.Link.OUTPUT)
            task4.uses(newcsv, link=peg.Link.INPUT)
            task4.uses(partCfg, link=peg.Link.INPUT)
            dax.depends(parent=task3, child=task4)

            # Look for chunk files in the output folder of this partitiong
            # Cannot handle smaller job units at dax creation as the folder is not yet populated;
            # if we want smaller units, consider using dynamic subworkflow
            taskname = 'allocateChunk'
            task5 = peg.Job(name=taskname)
            task5.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
            task5.addArguments(outdir, "--idFile", transIdFile)
            dax.addJob(task5)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task5.setStdout(logfile)
            task5.setStderr(logfile)
            task5.uses(logfile, link=peg.Link.OUTPUT)
            task5.uses(transIdFile, link=peg.Link.INPUT)
            dax.depends(parent=task4, child=task5)
            dax.depends(parent=task0c, child=task5)

            taskname = 'loadData'
            task6 = peg.Job(name=taskname)
            task6.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
            task6.addArguments(logfile)
            dax.addJob(task6)
            task6.uses(logfile, link=peg.Link.INPUT)
            logfile6 = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile6)
            task6.setStdout(logfile6)
            task6.setStderr(logfile6)
            task6.uses(logfile6, link=peg.Link.OUTPUT)
            dax.depends(parent=task5, child=task6)
            dax.depends(parent=task6, child=task0d)

    return dax
예제 #20
0
import os
import sys
import glob
import pwd
import time
import Pegasus.DAX3 as DAX3
from JobMaker import JobMaker
from repo_tools import *

USER = pwd.getpwuid(os.getuid())[0]

# Create a abstract dag
dax = DAX3.ADAG("Level_2_Coadd_Pipeline")

# Add some workflow-level metadata
dax.metadata("creator", "%s@%s" % (USER, os.uname()[1]))
dax.metadata("created", time.ctime())

input_repo = '/global/cscratch1/sd/descdm/DC1/DC1-imsim-dithered'
output_repo = '.'
config_dir = './configs'

job_maker = JobMaker(dax,
                     output_repo,
                     config_dir,
                     bin_dir='./bin',
                     tc='tc.txt')

# Loop over tracts
for tract in tract_list(output_repo):
    # Loop over patches.
예제 #21
0
def setup_injection_minifollowups(workflow,
                                  injection_file,
                                  inj_xml_file,
                                  single_triggers,
                                  tmpltbank_file,
                                  insp_segs,
                                  insp_seg_name,
                                  dax_output,
                                  out_dir,
                                  tags=None):
    """ Create plots that followup the closest missed injections
    
    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    coinc_file: 
    single_triggers: list of pycbc.workflow.File
        A list cointaining the file objects associated with the merged
        single detector trigger files for each ifo.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: dict
        A dictionary, keyed by ifo name, of the data read by each inspiral job.
    insp_segs_name: str 
        The name of the segmentlist to read from the inspiral segment file
    out_dir: path
        The directory to store minifollowups result plots and files
    tags: {None, optional}
        Tags to add to the minifollowups executables
    
    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the 
        minifollops plots.
    """
    logging.info('Entering injection minifollowups module')

    if not workflow.cp.has_section('workflow-injection_minifollowups'):
        logging.info(
            'There is no [workflow-injection_minifollowups] section in configuration file'
        )
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    config_path = os.path.abspath(dax_output + '/' + '_'.join(tags) +
                                  'injection_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(config_path, 'local')

    exe = Executable(workflow.cp,
                     'injection_minifollowup',
                     ifos=workflow.ifos,
                     out_dir=dax_output)

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--injection-file', injection_file)
    node.add_input_opt('--injection-xml-file', inj_xml_file)
    node.add_multiifo_input_list_opt('--single-detector-triggers',
                                     single_triggers)
    node.add_multiifo_input_list_opt('--inspiral-segments', insp_segs.values())
    node.add_opt('--inspiral-segment-name', insp_seg_name)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax',
                             '--output-file',
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax.map',
                             '--output-map',
                             tags=tags)

    name = node.output_files[0].name
    map_loc = node.output_files[1].name

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this is a sub-workflow
    fil = node.output_files[0]

    job = dax.DAX(fil)
    job.addArguments('--basename %s' %
                     os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_loc)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving injection minifollowups module')
예제 #22
0
def main():
    args = parse_args()
    setup_logger(args.debug)

    # TODO: handle execeptions for bad file paths
    workflow_file_path = args.cwl_workflow_file_path
    workflow_file_dir = os.path.dirname(workflow_file_path)

    log.info("Loading {}".format(workflow_file_path))
    workflow = cwl.load_document(workflow_file_path)

    adag = dax.ADAG("dag-generated-from-cwl", auto=True)
    rc = ReplicaCatalog()
    tc = TransformationCatalog(workflow_file_dir)

    # process initial input file(s)
    # TODO: need to account for the different fields for a file class
    # TODO: log warning for the fields that we are skipping
    workflow_input_strings = dict()
    workflow_files = dict()

    log.info("Collecting inputs in {}".format(args.input_file_spec_path))
    with open(args.input_file_spec_path, "r") as yaml_file:
        input_file_specs = load(yaml_file, Loader=Loader)

        for input in workflow.inputs:
            input_type = input.type

            if input_type == "File":
                workflow_files[get_basename(input.id)] = get_basename(input.id)
                # TODO: account for non-local sites
                rc.add_item(get_basename(input.id),
                            input_file_specs[get_basename(input.id)]["path"],
                            "local")
            elif input_type == "string":
                workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]
            elif isinstance(input_type, cwl.InputArraySchema):
                if input_type.items == "File":
                    # TODO: account for workflow inputs of type File[]
                    pass
                elif input_type.items == "string":
                    workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]

    log.info("Collecting output files")
    for step in workflow.steps:
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        for output in cwl_command_line_tool.outputs:
            # TODO: account for outputs that are not files
            output_name = get_name(step.id, output.id)

            log.debug("Adding (key: {0}, value: {1}) to workflow_files".format(
                output_name, output.outputBinding.glob))

            # TODO: throw error when glob contains javascript expression
            #       or pattern as we cannot support anything that is dynamic
            workflow_files[output_name] = output.outputBinding.glob

    log.info("Building workflow steps into dax jobs")
    for step in workflow.steps:
        # convert cwl:CommandLineTool -> pegasus:Executable
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \
            os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand

        dax_executable = dax.Executable(executable_name)

        # add executable to transformation catalog
        tc.add_item(executable_name, cwl_command_line_tool.baseCommand)

        # create job with executable
        dax_job = dax.Job(dax_executable)

        step_inputs = dict()
        for input in step.in_:
            input_id = get_basename(input.id)
            if isinstance(input.source, str):
                step_inputs[input_id] = get_basename(input.source)
            elif isinstance(input.source, list):
                step_inputs[input_id] = [
                    get_basename(file) for file in input.source
                ]

        # add input uses to job
        for input in cwl_command_line_tool.inputs:
            if input.type == "File":
                file_id = step_inputs[get_name(step.id, input.id)]
                file = dax.File(workflow_files[file_id])
                log.debug("Adding link ({0} -> {1})".format(
                    file_id, dax_job.name))

                dax_job.uses(file, link=dax.Link.INPUT)

            # TODO: better type checking for string[] and File[] ?
            elif isinstance(input.type, cwl.CommandInputArraySchema):
                if input.type.items == "File":
                    file_ids = step_inputs[get_name(step.id, input.id)]
                    for file_id in file_ids:
                        file = dax.File(workflow_files[file_id])
                        log.debug("Adding link ({0} -> {1})".format(
                            file_id, dax_job.name))

                        dax_job.uses(file, link=dax.Link.INPUT)

        # add output uses to job
        # TODO: ensure that these are of type File or File[]
        for output in step.out:
            file_id = get_basename(output)
            file = dax.File(workflow_files[file_id])
            log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id))

            dax_job.uses(file,
                         link=dax.Link.OUTPUT,
                         transfer=True,
                         register=True)

        # add arguments to job
        # TODO: place argument building up in a function
        dax_job_args = cwl_command_line_tool.arguments if \
            cwl_command_line_tool.arguments is not None else []

        # process cwl inputBindings if they exist and build up job argument list
        cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs,
            key=lambda input : input.inputBinding.position if input.inputBinding.position \
                is not None else 0 )

        for input in cwl_command_line_tool_inputs:
            # process args
            if input.inputBinding is not None:
                # TODO: account for inputBinding separation
                if input.inputBinding.prefix is not None:
                    dax_job_args.append(input.inputBinding.prefix)

                if input.type == "File":
                    dax_job_args.append(
                        dax.File(workflow_files[step_inputs[get_name(
                            step.id, input.id)]]))

                if input.type == "string":
                    dax_job_args.append(
                        workflow_input_strings[step_inputs[get_name(
                            step.id, input.id)]])

                # handle array type inputs
                if isinstance(input.type, cwl.CommandInputArraySchema):
                    if input.type.items == "File":
                        for file in step_inputs[get_name(step.id, input.id)]:
                            dax_job_args.append(dax.File(workflow_files[file]))
                    elif input.type.items == "string":
                        input_string_arr_id = step_inputs[get_name(
                            step.id, input.id)]

                        separator = " " if input.inputBinding.itemSeparator is None \
                                        else input.inputBinding.itemSeparator

                        dax_job_args.append(
                            # TODO: currently only accounting for input strings that
                            #       are inputs to the entire workflow
                            separator.join(
                                workflow_input_strings[input_string_arr_id]))

        log.debug("Adding job: {0}, with args: {1}".format(
            dax_job.name, dax_job_args))
        dax_job.addArguments(*dax_job_args)

        # add job to DAG
        adag.addJob(dax_job)

    rc.write_catalog("rc.txt")
    tc.write_catalog("tc.txt")

    with open(args.output_file_path, "w") as f:
        log.info("Writing DAX to {}".format(args.output_file_path))
        adag.writeXML(f)
예제 #23
0
 def insert_into_dax(self, dax):
     dax.addExecutable(self._dax_executable)
예제 #24
0
def setup_postproc_coh_PTF_workflow(workflow,
                                    trig_files,
                                    trig_cache,
                                    inj_trig_files,
                                    inj_files,
                                    inj_trig_caches,
                                    inj_caches,
                                    config_file,
                                    output_dir,
                                    html_dir,
                                    segment_dir,
                                    ifos,
                                    inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp,
                                         "injfinder",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp,
                                             "injcombiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos,
                        "foundmissed",
                        full_segment,
                        extension="lcf",
                        directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(
                        clust_file,
                        segment_dir,
                        inj_file=curr_injs,
                        tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(
                unclust_file, segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                                                          offsource_clustered,
                                                          segment_dir,
                                                          tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)

                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(
                            parent=injcombiner_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(
                            parent=injfinder_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = html_summary_node.executable.get_pfn() + " "
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
예제 #25
0
def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file,
                                   insp_segs, insp_data_name, insp_anal_name,
                                   dax_output, out_dir, veto_file=None,
                                   veto_segment_name=None, statfiles=None,
                                   tags=None):
    """ Create plots that followup the Nth loudest clustered single detector
    triggers from a merged single detector trigger HDF file.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    single_trig_file: pycbc.workflow.File
        The File class holding the single detector triggers.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: SegFile
       The segment file containing the data read by each inspiral job.
    insp_data_name: str
        The name of the segmentlist storing data read.
    insp_anal_name: str
        The name of the segmentlist storing data analyzed.
    out_dir: path
        The directory to store minifollowups result plots and files
    statfiles: FileList (optional, default=None)
        Supplementary files necessary for computing the single-detector
        statistic.
    tags: {None, optional}
        Tags to add to the minifollowups executables
    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the
        minifollops plots.
    """
    logging.info('Entering minifollowups module')

    if not workflow.cp.has_section('workflow-sngl_minifollowups'):
        msg = 'There is no [workflow-sngl_minifollowups] section in '
        msg += 'configuration file'
        logging.info(msg)
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    curr_ifo = single_trig_file.ifo
    config_path = os.path.abspath(dax_output + '/' + curr_ifo + \
                                   '_'.join(tags) + 'singles_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(urljoin('file:', pathname2url(config_path)), site='local')

    exe = Executable(workflow.cp, 'singles_minifollowup',
                     ifos=curr_ifo, out_dir=dax_output, tags=tags)

    wikifile = curr_ifo + '_'.join(tags) + 'loudest_table.txt'

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--single-detector-file', single_trig_file)
    node.add_input_opt('--inspiral-segments', insp_segs)
    node.add_opt('--inspiral-data-read-name', insp_data_name)
    node.add_opt('--inspiral-data-analyzed-name', insp_anal_name)
    node.add_opt('--instrument', curr_ifo)
    node.add_opt('--wiki-file', wikifile)
    if veto_file is not None:
        assert(veto_segment_name is not None)
        node.add_input_opt('--veto-file', veto_file)
        node.add_opt('--veto-segment-name', veto_segment_name)
    if statfiles:
        statfiles = statfiles.find_output_with_ifo(curr_ifo)
        node.add_input_list_opt('--statistic-files', statfiles)
    node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file')
    node.new_output_file_opt(workflow.analysis_time, '.dax.map',
                             '--output-map')
    node.new_output_file_opt(workflow.analysis_time, '.tc.txt',
                             '--transformation-catalog')

    name = node.output_files[0].name
    map_file = node.output_files[1]
    tc_file = node.output_files[2]

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this in a sub-workflow
    fil = node.output_files[0]

    # determine if a staging site has been specified
    try:
        staging_site = workflow.cp.get('workflow-sngl_minifollowups',
                                       'staging-site')
    except:
        staging_site = None

    job = dax.DAX(fil)
    job.addArguments('--basename %s' \
                     % os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_file, tc_file,
                                staging_site=staging_site)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving minifollowups module')
예제 #26
0
 def add_profile(self, namespace, key, value):
     """ Add profile information to this node at the DAX level
     """
     entry = dax.Profile(namespace, key, value)
     self._dax_node.addProfile(entry)
def setup_foreground_inference(workflow,
                               coinc_file,
                               single_triggers,
                               tmpltbank_file,
                               insp_segs,
                               insp_data_name,
                               insp_anal_name,
                               dax_output,
                               out_dir,
                               tags=None):
    """ Creates workflow node that will run the inference workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    coinc_file: pycbc.workflow.File
        The file associated with coincident triggers.
    single_triggers: list of pycbc.workflow.File
        A list cointaining the file objects associated with the merged
        single detector trigger files for each ifo.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: SegFile
       The segment file containing the data read and analyzed by each inspiral
       job.
    insp_data_name: str
        The name of the segmentlist storing data read.
    insp_anal_name: str
        The name of the segmentlist storing data analyzed.
    dax_output : str
        The name of the output DAX file.
    out_dir: path
        The directory to store inference result plots and files
    tags: {None, optional}
        Tags to add to the inference executables
    """

    logging.info("Entering inference module")

    # check if configuration file has inference section
    if not workflow.cp.has_section("workflow-inference"):
        logging.info(
            "There is no [workflow-inference] section in configuration file")
        logging.info("Leaving inference module")
        return

    # default tags is a list
    tags = [] if tags is None else tags

    # make the directory that will contain the dax file
    makedir(dax_output)

    # turn the config file into a File class
    config_path = os.path.abspath(dax_output + "/" + "_".join(tags) \
                                        + "foreground_inference.ini")
    workflow.cp.write(open(config_path, "w"))
    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(config_path, "local")

    # create an Executable for the inference workflow generator
    exe = Executable(workflow.cp,
                     "foreground_inference",
                     ifos=workflow.ifos,
                     out_dir=dax_output)

    # create the node that will run in the workflow
    node = exe.create_node()
    node.add_input_opt("--config-files", config_file)
    node.add_input_opt("--bank-file", tmpltbank_file)
    node.add_input_opt("--statmap-file", coinc_file)
    node.add_multiifo_input_list_opt("--single-detector-triggers",
                                     single_triggers)
    node.new_output_file_opt(workflow.analysis_time,
                             ".dax",
                             "--output-file",
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             ".dax.map",
                             "--output-map",
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             ".tc.txt",
                             "--transformation-catalog",
                             tags=tags)

    # get dax name and use it for the workflow name
    name = node.output_files[0].name
    node.add_opt("--workflow-name", name)

    # get output map name and use it for the output dir name
    map_file = node.output_files[1]
    node.add_opt("--output-dir", out_dir)

    # get the transformation catalog name
    tc_file = node.output_files[2]

    # add this node to the workflow
    workflow += node

    # create job for dax that will run a sub-workflow
    # and add it to the workflow
    fil = node.output_files[0]
    job = dax.DAX(fil)
    job.addArguments("--basename %s" %
                     os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_file, tc_file)
    workflow._adag.addJob(job)

    # make dax a child of the inference workflow generator node
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)

    logging.info("Leaving inference module")
예제 #28
0
import os
import pwd
import time
import Pegasus.DAX3 as DAX3

USER = pwd.getpwuid(os.getuid())[0]

# Create a abstract dag
dax = DAX3.ADAG("Strong Lensing Pipeline")

# Add some workflow-level metadata
dax.metadata("creator", "%s@%s" % (USER, os.uname()[1]))
dax.metadata("created", time.ctime())

dm_level1_catalog = DAX3.File('dm_level1_catalog')
dm_images = DAX3.File('dm_image_data')
SL_candidates = DAX3.File('SL_candidates')
SLFinder = DAX3.Job('SLFinder')
SLFinder.uses(dm_level1_catalog, link=DAX3.Link.INPUT)
SLFinder.uses(dm_images, link=DAX3.Link.INPUT)
SLFinder.uses(SL_candidates,
              link=DAX3.Link.OUTPUT,
              register=True,
              transfer=True)
dax.addJob(SLFinder)

DESC_Lenses = DAX3.File('DESC_Lenses')
SpaceWarps = DAX3.Job('SpaceWarps')
SpaceWarps.uses(SL_candidates, link=DAX3.Link.INPUT)
SpaceWarps.uses(DESC_Lenses,
                link=DAX3.Link.OUTPUT,
예제 #29
0
 def has_pfn(self, url, site=None):
     """ Wrapper of the pegasus hasPFN function, that allows it to be called
     outside of specific pegasus functions.
     """
     curr_pfn = dax.PFN(url, site)
     return self.hasPFN(curr_pfn)
예제 #30
0
def setup_single_det_minifollowups(workflow,
                                   single_trig_file,
                                   tmpltbank_file,
                                   insp_segs,
                                   insp_seg_name,
                                   dax_output,
                                   out_dir,
                                   veto_file=None,
                                   veto_segment_name=None,
                                   tags=None):
    """ Create plots that followup the Nth loudest clustered single detector
    triggers from a merged single detector trigger HDF file.
    
    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    single_trig_file: pycbc.workflow.File
        The File class holding the single detector triggers.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: dict
        A dictionary, keyed by ifo name, of the data read by each inspiral job.
    insp_segs_name: str 
        The name of the segmentlist to read from the inspiral segment file
    out_dir: path
        The directory to store minifollowups result plots and files
    tags: {None, optional}
        Tags to add to the minifollowups executables    
    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the 
        minifollops plots.
    """
    logging.info('Entering minifollowups module')

    if not workflow.cp.has_section('workflow-minifollowups'):
        msg = 'There is no [workflow-minifollowups] section in '
        msg += 'configuration file'
        logging.info(msg)
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    curr_ifo = single_trig_file.ifo
    config_path = os.path.abspath(dax_output + '/' + curr_ifo + \
                                   '_'.join(tags) + 'singles_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(config_path, 'local')

    exe = Executable(workflow.cp,
                     'singles_minifollowup',
                     ifos=curr_ifo,
                     out_dir=dax_output)

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--single-detector-file', single_trig_file)
    node.add_input_opt('--inspiral-segments', insp_segs[curr_ifo])
    node.add_opt('--inspiral-segment-name', insp_seg_name)
    node.add_opt('--instrument', curr_ifo)
    if veto_file is not None:
        assert (veto_segment_name is not None)
        node.add_input_opt('--veto-file', veto_file)
        node.add_opt('--veto-segment-name', veto_segment_name)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax',
                             '--output-file',
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax.map',
                             '--output-map',
                             tags=tags)

    name = node.output_files[0].name
    map_loc = node.output_files[1].name

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this is a sub-workflow
    fil = node.output_files[0]

    job = dax.DAX(fil)
    job.addArguments('--basename %s' \
                     % os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_loc)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving minifollowups module')
예제 #31
0
def setup_foreground_minifollowups(workflow, coinc_file, single_triggers,
                       tmpltbank_file, insp_segs, insp_data_name,
                       insp_anal_name, dax_output, out_dir, tags=None):
    """ Create plots that followup the Nth loudest coincident injection
    from a statmap produced HDF file.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    coinc_file:
    single_triggers: list of pycbc.workflow.File
        A list cointaining the file objects associated with the merged
        single detector trigger files for each ifo.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: SegFile
       The segment file containing the data read and analyzed by each inspiral
       job.
    insp_data_name: str
        The name of the segmentlist storing data read.
    insp_anal_name: str
        The name of the segmentlist storing data analyzed.
    out_dir: path
        The directory to store minifollowups result plots and files
    tags: {None, optional}
        Tags to add to the minifollowups executables

    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the
        minifollops plots.
    """
    logging.info('Entering minifollowups module')

    if not workflow.cp.has_section('workflow-minifollowups'):
        logging.info('There is no [workflow-minifollowups] section in configuration file')
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    config_path = os.path.abspath(dax_output + '/' + '_'.join(tags) + 'foreground_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(urljoin('file:', pathname2url(config_path)), site='local')

    exe = Executable(workflow.cp, 'foreground_minifollowup',
                     ifos=workflow.ifos, out_dir=dax_output, tags=tags)

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--statmap-file', coinc_file)
    node.add_multiifo_input_list_opt('--single-detector-triggers',
                                     single_triggers)
    node.add_input_opt('--inspiral-segments', insp_segs)
    node.add_opt('--inspiral-data-read-name', insp_data_name)
    node.add_opt('--inspiral-data-analyzed-name', insp_anal_name)
    if tags:
        node.add_list_opt('--tags', tags)
    node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file')
    node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map')
    node.new_output_file_opt(workflow.analysis_time, '.tc.txt',
                             '--transformation-catalog')

    name = node.output_files[0].name
    map_file = node.output_files[1]
    tc_file = node.output_files[2]

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this in a sub-workflow
    fil = node.output_files[0]

    # determine if a staging site has been specified
    try:
        staging_site = workflow.cp.get('workflow-foreground_minifollowups',
                                       'staging-site')
    except:
        staging_site = None

    job = dax.DAX(fil)
    job.addArguments('--basename %s' % os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_file, tc_file, staging_site=staging_site)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving minifollowups module')
예제 #32
0
 def add_profile(self, namespace, key, value):
     """ Add profile information to this executable
     """
     entry = dax.Profile(namespace, key, value)
     self._dax_executable.addProfile(entry)