Exemple #1
0
def setup_postprocprep_pipedown_workflow(workflow, coincFiles, output_dir,
                                      tags=[], do_repop=False, 
                                      injectionFiles=None,
                                      vetoFiles=None, injLessTag=None,
                                      injectionTags=[], veto_cats=[]):
    """
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    coincFiles : pycbc.workflow.core.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    do_repop : Boolean
        If False, use the 'coinc_inspiral.snr' column from the coincident 
        trigger files as clustering and ranking statistic; if True, use
        a repop_coinc job before clustering to calculate a different ranking
        statistic and store in the coinc_inspiral table for later use.
    injectionFiles : pycbc.workflow.core.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    vetoFiles : pycbc.workflow.core.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    injLessTag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injectionTags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cats : list of integers (optional, default = [])
        Decide which set of veto files should be used in the post-processing
        preparation. For example tell the workflow to only generate results
        at cumulative categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    finalFiles : pycbc.workflow.core.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : pycbc.workflow.core.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : pycbc.workflow.core.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : pycbc.workflow.core.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")

    # Setup needed exe classes
    sqliteCombine1ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner1-exe", tags)
    sqliteCombine1Exe = select_generic_executable(workflow, 
                                                  sqliteCombine1ExeTag)
    sqliteCombine2ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner2-exe", tags)
    sqliteCombine2Exe = select_generic_executable(workflow, 
                                                  sqliteCombine2ExeTag)
    clusterCoincsExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-cluster-exe", tags)
    clusterCoincsExe = select_generic_executable(workflow, clusterCoincsExeTag)
    injFindExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-injfind-exe", tags)
    injFindExe = select_generic_executable(workflow, injFindExeTag)

    sqliteCombine1Outs = FileList([])
    clusterCoincsOuts = FileList([])
    injFindOuts = FileList([])
    sqliteCombine2Outs = FileList([])

    if do_repop:
        repopCoincExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                                "postprocprep-repop-exe", tags)
        repopCoincExe = select_generic_executable(workflow, repopCoincExeTag)
        repopCoincOuts = FileList([])

    for cat in veto_cats:
        # FIXME: Some hacking is still needed while we support pipedown
        # FIXME: There are currently 3 names to say cumulative cat_3
        vetoTag = 'CUMULATIVE_CAT_%d' %(cat)
        dqSegFile = vetoFiles.find_output_with_tag(vetoTag)
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            raise ValueError(errMsg)
        # Don't think this is used here, this is the tag *in* the file
        dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(cat)
        # FIXME: Here we set the dqVetoName to be compatible with pipedown
        pipedownDQVetoName = 'CAT_%d_VETO' %(cat)

        sqliteCombine2Inputs = FileList([])
        # Do injection-less jobs first.

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Combine trig files first
        currTags = tags + [injLessTag, vetoTag]
        trigVetoInpFiles = coincFiles.find_output_with_tag(pipedownDQVetoName)
        trigInpFiles = trigVetoInpFiles.find_output_with_tag(injLessTag)
        if len(trigInpFiles) == 0:
            err_msg = "No input files found. Workflow would fail."
            raise ValueError(err_msg)
        trigInpFiles.append(dqSegFile[0])
        sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                              sqliteCombine1ExeTag,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles, 
                                          workflow=workflow)
        sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine1Node)
        # Node has only one output file
        sqliteCombine1Out = sqliteCombine1Node.output_files[0]
        sqliteCombine1Outs.append(sqliteCombine1Out)

        if do_repop:
            repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
            repopCoincNode = repopCoincJob.create_node(workflow.analysis_time,
                                                       sqliteCombine1Out)
            repopCoincNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(repopCoincNode)
            # Node has only one output file
            repopCoincOut = repopCoincNode.output_files[0]
            repopCoincOuts.append(repopCoincOut)

        # Input file plumbing allowing for possible repop_coinc job
        clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
        # Cluster coincidences
        clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                            clusterCoincsExeTag,
                                            ifo=workflow.ifo_string, 
                                            out_dir=output_dir, 
                                            tags=currTags)
        clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
        clusterCoincsNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(clusterCoincsNode)
        # Node has only one output file
        clusterCoincsOut = clusterCoincsNode.output_files[0]
        clusterCoincsOuts.append(clusterCoincsOut)
        sqliteCombine2Inputs.append(clusterCoincsOut)

        # Do injection jobs
        for injTag in injectionTags:
            # Choose a label for clustering the jobs
            job_label = get_random_label()
            # Combine trig files first
            currTags = tags + [injTag, vetoTag]
            trigInpFiles = trigVetoInpFiles.find_output_with_tag(injTag)
            trigInpFiles.append(dqSegFile[0])
            injFile = injectionFiles.find_output_with_tag(injTag)
            assert (len(injFile) == 1)
            sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                                  sqliteCombine1ExeTag,
                                                  ifo=workflow.ifo_string,
                                                  out_dir=output_dir,
                                                  tags=currTags)
            sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles,
                                          injFile=injFile[0], injString=injTag,
                                          workflow=workflow)
            sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
            workflow.add_node(sqliteCombine1Node)
            # Node has only one output file
            sqliteCombine1Out = sqliteCombine1Node.output_files[0]
            sqliteCombine1Outs.append(sqliteCombine1Out)

            if do_repop:
                repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
                repopCoincNode = repopCoincJob.create_node(
                                     workflow.analysis_time, sqliteCombine1Out)
                repopCoincNode.add_profile('pegasus', 'label', job_label)
                workflow.add_node(repopCoincNode)
                # Node has only one output file
                repopCoincOut = repopCoincNode.output_files[0]
                repopCoincOuts.append(repopCoincOut)

            # Input file plumbing allowing for possible repop_coinc job
            clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
            # Cluster coincidences
            clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                                clusterCoincsExeTag,
                                                ifo=workflow.ifo_string,
                                                out_dir=output_dir,
                                                tags=currTags)
            clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
            clusterCoincsNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(clusterCoincsNode)
            # Node has only one output file
            clusterCoincsOut = clusterCoincsNode.output_files[0]
            clusterCoincsOuts.append(clusterCoincsOut)
            sqliteCombine2Inputs.append(clusterCoincsOut)

        # Choose a new label for pegasus-clustering the jobs
        job_label = get_random_label()

        # Combine everything together and add veto file
        currTags = tags + [vetoTag]
        sqliteCombine2Job = sqliteCombine2Exe(workflow.cp, 
                                              sqliteCombine2ExeTag,
                                              ifo=workflow.ifo_string, 
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine2Node = sqliteCombine2Job.create_node(
                                  workflow.analysis_time, sqliteCombine2Inputs)
        sqliteCombine2Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine2Node)
        sqliteCombine2Out = sqliteCombine2Node.output_files[0]
        sqliteCombine2Outs.append(sqliteCombine2Out)

        # Inj finding
        injFindJob = injFindExe(workflow.cp, injFindExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,tags=currTags)
        injFindNode = injFindJob.create_node(workflow.analysis_time,
                                                         sqliteCombine2Out)
        injFindNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(injFindNode)
        injFindOut = injFindNode.output_files[0]
        injFindOuts.append(injFindOut)


    return injFindOuts, sqliteCombine1Outs, clusterCoincsOuts,\
           sqliteCombine2Outs
Exemple #2
0
def setup_postproc_pipedown_workflow(workflow, trigger_files, summary_xml_files,
                                  output_dir, tags=[], veto_cats=[]):
    """
    This module sets up the post-processing stage in the workflow, using a pipedown
    style set up. This consists of running compute_durations to determine and
    store the analaysis time (foreground and background). It then runs cfar
    jobs to determine the false alarm rate for all triggers (simulations or
    otherwise) in the input database.
    Pipedown expects to take as input (at this stage) a single database
    containing all triggers. This sub-module follows that same idea, so
    len(triggerFiles) must equal 1 (for every DQ category that we will run).

    Parameters
    ----------  
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList containing the combined databases at CAT_1,2,3... that
        will be used to calculate FARs
    summary_xml_files : pycbc.workflow.core.FileList (required)
        A FileList of the output of the analysislogging_utils module.
        For pipedown-style post-processing this should be one file containing
        a segment table holding the single detector analysed times.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    veto_cats : list of integers (default = [], non-empty list required)
        Decide which veto category levels should be used in post-processing.
        For example tell the workflow to only generate results at cumulative
        categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    final_files : pycbc.workflow.core.FileList
        A list of the final SQL databases containing computed FARs.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    compute_durations_exe_tag = workflow.cp.get_opt_tags("workflow-postproc",
                                   "postproc-computedurations-exe", tags)
    compute_durations_exe = select_generic_executable(workflow,
                                                     compute_durations_exe_tag)
    cfar_exe_tag = workflow.cp.get_opt_tags("workflow-postproc", "postproc-cfar-exe",
                                       tags)
    cfar_exe = select_generic_executable(workflow, cfar_exe_tag)

    comp_durations_outs = FileList([])
    cfar_outs = FileList([])

    for cat in veto_cats:

        veto_tag = 'CUMULATIVE_CAT_%d' %(cat)
        trig_input_files = trigger_files.find_output_with_tag(veto_tag)
        if not len(trig_input_files) == 1:
            err_msg = "Did not find exactly 1 database input file."
            raise ValueError(err_msg)

        curr_tags = tags + [veto_tag]

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Start with compute durations
        computeDurationsJob = compute_durations_exe(workflow.cp, compute_durations_exe_tag,
                                                 ifo=workflow.ifo_string, 
                                                 out_dir=output_dir, 
                                                 tags=curr_tags)
        compute_durations_node = computeDurationsJob.create_node(
                                    workflow.analysis_time, trig_input_files[0],
                                    summary_xml_files[0])
        compute_durations_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(compute_durations_node)

        # Node has only one output file
        compute_durations_out = compute_durations_node.output_files[0]
        comp_durations_outs.append(compute_durations_out)

        # Add the calculate FAR (cfar) job
        cfar_job = cfar_exe(workflow.cp, cfar_exe_tag, 
                                      ifo=workflow.ifo_string, 
                                      out_dir=output_dir, 
                                      tags=curr_tags)
        cfar_node = cfar_job.create_node(workflow.analysis_time,
                                       compute_durations_out)
        cfar_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(cfar_node)

        # Node has only one output file
        cfar_out = cfar_node.output_files[0]
        cfar_outs.append(cfar_out)

    return cfar_outs
Exemple #3
0
def setup_postproc_pipedown_workflow(workflow,
                                     trigger_files,
                                     summary_xml_files,
                                     output_dir,
                                     tags=[],
                                     veto_cats=[]):
    """
    This module sets up the post-processing stage in the workflow, using a pipedown
    style set up. This consists of running compute_durations to determine and
    store the analaysis time (foreground and background). It then runs cfar
    jobs to determine the false alarm rate for all triggers (simulations or
    otherwise) in the input database.
    Pipedown expects to take as input (at this stage) a single database
    containing all triggers. This sub-module follows that same idea, so
    len(triggerFiles) must equal 1 (for every DQ category that we will run).

    Parameters
    ----------  
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList containing the combined databases at CAT_1,2,3... that
        will be used to calculate FARs
    summary_xml_files : pycbc.workflow.core.FileList (required)
        A FileList of the output of the analysislogging_utils module.
        For pipedown-style post-processing this should be one file containing
        a segment table holding the single detector analysed times.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    veto_cats : list of integers (default = [], non-empty list required)
        Decide which veto category levels should be used in post-processing.
        For example tell the workflow to only generate results at cumulative
        categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    final_files : pycbc.workflow.core.FileList
        A list of the final SQL databases containing computed FARs.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    compute_durations_exe_tag = workflow.cp.get_opt_tags(
        "workflow-postproc", "postproc-computedurations-exe", tags)
    compute_durations_exe = select_generic_executable(
        workflow, compute_durations_exe_tag)
    cfar_exe_tag = workflow.cp.get_opt_tags("workflow-postproc",
                                            "postproc-cfar-exe", tags)
    cfar_exe = select_generic_executable(workflow, cfar_exe_tag)

    comp_durations_outs = FileList([])
    cfar_outs = FileList([])

    for cat in veto_cats:

        veto_tag = 'CUMULATIVE_CAT_%d' % (cat)
        trig_input_files = trigger_files.find_output_with_tag(veto_tag)
        if not len(trig_input_files) == 1:
            err_msg = "Did not find exactly 1 database input file."
            raise ValueError(err_msg)

        curr_tags = tags + [veto_tag]

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Start with compute durations
        computeDurationsJob = compute_durations_exe(workflow.cp,
                                                    compute_durations_exe_tag,
                                                    ifo=workflow.ifo_string,
                                                    out_dir=output_dir,
                                                    tags=curr_tags)
        compute_durations_node = computeDurationsJob.create_node(
            workflow.analysis_time, trig_input_files[0], summary_xml_files[0])
        compute_durations_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(compute_durations_node)

        # Node has only one output file
        compute_durations_out = compute_durations_node.output_files[0]
        comp_durations_outs.append(compute_durations_out)

        # Add the calculate FAR (cfar) job
        cfar_job = cfar_exe(workflow.cp,
                            cfar_exe_tag,
                            ifo=workflow.ifo_string,
                            out_dir=output_dir,
                            tags=curr_tags)
        cfar_node = cfar_job.create_node(workflow.analysis_time,
                                         compute_durations_out)
        cfar_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(cfar_node)

        # Node has only one output file
        cfar_out = cfar_node.output_files[0]
        cfar_outs.append(cfar_out)

    return cfar_outs
Exemple #4
0
def setup_postprocprep_pipedown_workflow(workflow, coincFiles, output_dir,
                                      tags=[], do_repop=False, 
                                      injectionFiles=None,
                                      vetoFiles=None, injLessTag=None,
                                      injectionTags=[], veto_cats=[]):
    """
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    coincFiles : pycbc.workflow.core.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    do_repop : Boolean
        If False, use the 'coinc_inspiral.snr' column from the coincident 
        trigger files as clustering and ranking statistic; if True, use
        a repop_coinc job before clustering to calculate a different ranking
        statistic and store in the coinc_inspiral table for later use.
    injectionFiles : pycbc.workflow.core.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    vetoFiles : pycbc.workflow.core.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    injLessTag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injectionTags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cats : list of integers (optional, default = [])
        Decide which set of veto files should be used in the post-processing
        preparation. For example tell the workflow to only generate results
        at cumulative categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    finalFiles : pycbc.workflow.core.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : pycbc.workflow.core.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : pycbc.workflow.core.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : pycbc.workflow.core.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")

    # Setup needed exe classes
    sqliteCombine1ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner1-exe", tags)
    sqliteCombine1Exe = select_generic_executable(workflow, 
                                                  sqliteCombine1ExeTag)
    sqliteCombine2ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner2-exe", tags)
    sqliteCombine2Exe = select_generic_executable(workflow, 
                                                  sqliteCombine2ExeTag)
    clusterCoincsExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-cluster-exe", tags)
    clusterCoincsExe = select_generic_executable(workflow, clusterCoincsExeTag)
    injFindExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-injfind-exe", tags)
    injFindExe = select_generic_executable(workflow, injFindExeTag)

    sqliteCombine1Outs = FileList([])
    clusterCoincsOuts = FileList([])
    injFindOuts = FileList([])
    sqliteCombine2Outs = FileList([])

    if do_repop:
        repopCoincExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                                "postprocprep-repop-exe", tags)
        repopCoincExe = select_generic_executable(workflow, repopCoincExeTag)
        repopCoincOuts = FileList([])

    for cat in veto_cats:
        # FIXME: Some hacking is still needed while we support pipedown
        # FIXME: There are currently 3 names to say cumulative cat_3
        vetoTag = 'CUMULATIVE_CAT_%d' %(cat)
        dqSegFile = vetoFiles.find_output_with_tag(vetoTag)
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            raise ValueError(errMsg)
        # Don't think this is used here, this is the tag *in* the file
        dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(cat)
        # FIXME: Here we set the dqVetoName to be compatible with pipedown
        pipedownDQVetoName = 'CAT_%d_VETO' %(cat)

        sqliteCombine2Inputs = FileList([])
        # Do injection-less jobs first.

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Combine trig files first
        currTags = tags + [injLessTag, vetoTag]
        trigVetoInpFiles = coincFiles.find_output_with_tag(pipedownDQVetoName)
        trigInpFiles = trigVetoInpFiles.find_output_with_tag(injLessTag)
        if len(trigInpFiles) == 0:
            err_msg = "No input files found. Workflow would fail."
            raise ValueError(err_msg)
        trigInpFiles.append(dqSegFile[0])
        sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                              sqliteCombine1ExeTag,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles, 
                                          workflow=workflow)
        sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine1Node)
        # Node has only one output file
        sqliteCombine1Out = sqliteCombine1Node.output_files[0]
        sqliteCombine1Outs.append(sqliteCombine1Out)

        if do_repop:
            repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
            repopCoincNode = repopCoincJob.create_node(workflow.analysis_time,
                                                       sqliteCombine1Out)
            repopCoincNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(repopCoincNode)
            # Node has only one output file
            repopCoincOut = repopCoincNode.output_files[0]
            repopCoincOuts.append(repopCoincOut)

        # Input file plumbing allowing for possible repop_coinc job
        clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
        # Cluster coincidences
        clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                            clusterCoincsExeTag,
                                            ifo=workflow.ifo_string, 
                                            out_dir=output_dir, 
                                            tags=currTags)
        clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
        clusterCoincsNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(clusterCoincsNode)
        # Node has only one output file
        clusterCoincsOut = clusterCoincsNode.output_files[0]
        clusterCoincsOuts.append(clusterCoincsOut)
        sqliteCombine2Inputs.append(clusterCoincsOut)

        # Do injection jobs
        for injTag in injectionTags:
            # Choose a label for clustering the jobs
            job_label = get_random_label()
            # Combine trig files first
            currTags = tags + [injTag, vetoTag]
            trigInpFiles = trigVetoInpFiles.find_output_with_tag(injTag)
            trigInpFiles.append(dqSegFile[0])
            injFile = injectionFiles.find_output_with_tag(injTag)
            assert (len(injFile) == 1)
            sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                                  sqliteCombine1ExeTag,
                                                  ifo=workflow.ifo_string,
                                                  out_dir=output_dir,
                                                  tags=currTags)
            sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles,
                                          injFile=injFile[0], injString=injTag,
                                          workflow=workflow)
            sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
            workflow.add_node(sqliteCombine1Node)
            # Node has only one output file
            sqliteCombine1Out = sqliteCombine1Node.output_files[0]
            sqliteCombine1Outs.append(sqliteCombine1Out)

            if do_repop:
                repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
                repopCoincNode = repopCoincJob.create_node(
                                     workflow.analysis_time, sqliteCombine1Out)
                repopCoincNode.add_profile('pegasus', 'label', job_label)
                workflow.add_node(repopCoincNode)
                # Node has only one output file
                repopCoincOut = repopCoincNode.output_files[0]
                repopCoincOuts.append(repopCoincOut)

            # Input file plumbing allowing for possible repop_coinc job
            clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
            # Cluster coincidences
            clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                                clusterCoincsExeTag,
                                                ifo=workflow.ifo_string,
                                                out_dir=output_dir,
                                                tags=currTags)
            clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
            clusterCoincsNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(clusterCoincsNode)
            # Node has only one output file
            clusterCoincsOut = clusterCoincsNode.output_files[0]
            clusterCoincsOuts.append(clusterCoincsOut)
            sqliteCombine2Inputs.append(clusterCoincsOut)

        # Choose a new label for pegasus-clustering the jobs
        job_label = get_random_label()

        # Combine everything together and add veto file
        currTags = tags + [vetoTag]
        sqliteCombine2Job = sqliteCombine2Exe(workflow.cp, 
                                              sqliteCombine2ExeTag,
                                              ifo=workflow.ifo_string, 
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine2Node = sqliteCombine2Job.create_node(
                                  workflow.analysis_time, sqliteCombine2Inputs)
        sqliteCombine2Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine2Node)
        sqliteCombine2Out = sqliteCombine2Node.output_files[0]
        sqliteCombine2Outs.append(sqliteCombine2Out)

        # Inj finding
        injFindJob = injFindExe(workflow.cp, injFindExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,tags=currTags)
        injFindNode = injFindJob.create_node(workflow.analysis_time,
                                                         sqliteCombine2Out)
        injFindNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(injFindNode)
        injFindOut = injFindNode.output_files[0]
        injFindOuts.append(injFindOut)


    return injFindOuts, sqliteCombine1Outs, clusterCoincsOuts,\
           sqliteCombine2Outs