Esempio n. 1
0
def make_gating_node(workflow, datafind_files, outdir=None, tags=None):
    '''
    Generate jobs for autogating the data for PyGRB runs.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    datafind_files : pycbc.workflow.core.FileList
        A FileList containing the frame files to be gated.
    outdir : string
        Path of the output directory
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    condition_strain_nodes : list
        List containing the pycbc.workflow.core.Node objects representing the
        autogating jobs.
    condition_strain_outs : pycbc.workflow.core.FileList
        FileList containing the pycbc.workflow.core.File objects representing
        the gated frame files.
    '''

    cp = workflow.cp
    if tags is None:
        tags = []

    condition_strain_class = select_generic_executable(workflow,
                                                       "condition_strain")
    condition_strain_nodes = []
    condition_strain_outs = FileList([])
    for ifo in workflow.ifos:
        input_files = FileList([datafind_file for datafind_file in \
                                datafind_files if datafind_file.ifo == ifo])
        condition_strain_jobs = condition_strain_class(cp,
                                                       "condition_strain",
                                                       ifo=ifo,
                                                       out_dir=outdir,
                                                       tags=tags)
        condition_strain_node, condition_strain_out = \
                condition_strain_jobs.create_node(input_files, tags=tags)
        condition_strain_nodes.append(condition_strain_node)
        condition_strain_outs.extend(FileList([condition_strain_out]))

    return condition_strain_nodes, condition_strain_outs
Esempio n. 2
0
def make_gating_node(workflow, datafind_files, outdir=None, tags=None):
    '''
    Generate jobs for autogating the data for PyGRB runs.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    datafind_files : pycbc.workflow.core.FileList
        A FileList containing the frame files to be gated.
    outdir : string
        Path of the output directory
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    condition_strain_nodes : list
        List containing the pycbc.workflow.core.Node objects representing the
        autogating jobs.
    condition_strain_outs : pycbc.workflow.core.FileList
        FileList containing the pycbc.workflow.core.File objects representing
        the gated frame files.
    '''

    cp = workflow.cp
    if tags is None:
        tags = []
    
    condition_strain_class = select_generic_executable(workflow,
                                                       "condition_strain")
    condition_strain_nodes = []
    condition_strain_outs = FileList([])
    for ifo in workflow.ifos:
        input_files = FileList([datafind_file for datafind_file in \
                                datafind_files if datafind_file.ifo == ifo])
        condition_strain_jobs = condition_strain_class(cp, "condition_strain",
                ifo=ifo, out_dir=outdir, tags=tags)
        condition_strain_node, condition_strain_out = \
                condition_strain_jobs.create_node(input_files, tags=tags)
        condition_strain_nodes.append(condition_strain_node)
        condition_strain_outs.extend(FileList([condition_strain_out]))

    return condition_strain_nodes, condition_strain_outs
Esempio n. 3
0
def setup_postprocprep_gstlal_workflow(workflow, coinc_files, output_dir,
                                       tags=[], injection_files=None,
                                       veto_files=None, inj_less_tag=None,
                                       injection_tags=[], veto_cat=None,
                                       summary_xml_files=None,
                                       likelihood_files=[]):
    """
    Parameters
    -----------
    workflow : workflow.Workflow
        The workflow instance that the coincidence jobs will be added to.
    coinc_files : workflow.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    injection_files : workflow.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    veto_files : workflow.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    inj_less_tag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injection_tags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cat : int (optional, default = None)
        FIXME: How does gstlal deal with veto categories?
        Hardcode to CAT1 for now.
    summary_xml_files : workflow.FileList
        An FileList of the output of the analysislogging_utils module.
        Here, this will be one file that includes the segments analysed by the
        workflow.

    Returns
    --------
    finalFiles : workflow.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : workflow.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : workflow.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : workflow.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    # Sanity checks
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    run_sqlite_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-runsqlite-exe", tags)
    ligolw_sqlite_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-ligolwsqlite-exe", tags) 
    inspinjfind_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-inspinjfind-exe", tags)
    sql_to_xml_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-sqltoxml-exe", tags)
    pycbc_picklehor_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-picklehor-exe", tags)
    pycbc_combllhood_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combllhood-exe", tags)
    pycbc_genranking_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-genranking-exe", tags)
    pycbc_compllhood_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-compllhood-exe", tags)
    marg_likelihood_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-marglikelihood-exe", tags)
    far_gstlal_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-fargstlal-exe", tags)
    plot_summary_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotsummary-exe", tags)
    plot_sensitivity_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotsensitivity-exe", tags)
    plot_background_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotbackground-exe", tags)
    summary_page_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-summarypage-exe", tags)


    run_sqlite_exe = select_generic_executable(workflow, run_sqlite_exe_name)
    ligolw_sqlite_exe = select_generic_executable(workflow,
                                                        ligolw_sqlite_exe_name)
    inspinjfind_exe = select_generic_executable(workflow, inspinjfind_exe_name)
    sql_to_xml_exe = select_generic_executable(workflow, sql_to_xml_exe_name)
    pycbc_picklehor_exe = select_generic_executable(workflow,
                                                      pycbc_picklehor_exe_name)
    pycbc_combllhood_exe = select_generic_executable(workflow,
                                                     pycbc_combllhood_exe_name)
    pycbc_genranking_exe = select_generic_executable(workflow,
                                                     pycbc_genranking_exe_name)
    pycbc_compllhood_exe = select_generic_executable(workflow,
                                                     pycbc_compllhood_exe_name)
    marg_likelihood_exe = select_generic_executable(workflow,
                                                      marg_likelihood_exe_name)
    far_gstlal_exe = select_generic_executable(workflow, far_gstlal_exe_name)
    plot_summary_exe = select_generic_executable(workflow,
                                                         plot_summary_exe_name)
    plot_sensitivity_exe = select_generic_executable(workflow,
                                                     plot_sensitivity_exe_name)
    plot_background_exe = select_generic_executable(workflow,
                                                      plot_background_exe_name)
    summary_page_exe = select_generic_executable(workflow,
                                                         summary_page_exe_name)


    # SETUP
    # FIXME: Some hacking is still needed while we support pipedown
    # FIXME: How does gstlal deal with veto categories?
    #         Hardcode to CAT1 for now.
    veto_tag = 'CUMULATIVE_CAT_%d' %(veto_cat,)
    dq_seg_file = veto_files.find_output_with_tag(veto_tag)
    assert len(dq_seg_file) == 1
    dq_seg_file = dq_seg_file[0]
    #if not len(dqSegFile) == 1:
    #    errMsg = "Did not find exactly 1 data quality file."
    #    raise ValueError(errMsg)
    # FIXME: Here we set the dqVetoName to be compatible with pipedown
    pipedown_dq_veto_name = 'CAT_%d_VETO' %(veto_cat,)

    # First we need to covert to SQL, this is STAGE0
    # Do for all injection runs and zero lag
    stage0_outputs = {}
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        trig_veto_inp_files = \
                  coinc_files.find_output_with_tag(pipedown_dq_veto_name)
        trig_inp_files = trig_veto_inp_files.find_output_with_tag(inj_tag)
        stage0_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE0'] + curr_tags)
        stage0_outputs[inj_tag] = FileList([])
        assert len(trig_inp_files) > 0
        for file in trig_inp_files:
            stage0_node = stage0_job.create_node(file.segment, [file])
            workflow.add_node(stage0_node)
            # Node has only one output file
            stage0_out = stage0_node.output_files[0]
            stage0_outputs[inj_tag].append(stage0_out)

    curr_tags = tags + [veto_tag]

    # NOW WE DO LIKELIHOOD SETUP
    pycbc_picklehor_job = pycbc_picklehor_exe(workflow.cp,
                                  pycbc_picklehor_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    pycbc_combllhood_job = pycbc_combllhood_exe(workflow.cp,
                                  pycbc_combllhood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    pycbc_genranking_job = pycbc_genranking_exe(workflow.cp, 
                                  pycbc_genranking_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    marg_likelihood_job_1 = marg_likelihood_exe(workflow.cp,
                                  marg_likelihood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=['MARG1']+curr_tags)
    marg_likelihood_job_2 = marg_likelihood_exe(workflow.cp,
                                  marg_likelihood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=['MARG2']+curr_tags)


    # Begin with finding the horizon distances
    picklehor_inputs = stage0_outputs[inj_less_tag]
    node = pycbc_picklehor_job.create_node(workflow.analysis_time,
                                                              picklehor_inputs)
    workflow.add_node(node)
    horizon_dist_file = node.output_files[0]
    # Then combine all likelihood files
    combllhood_inputs = likelihood_files.find_output_with_tag(\
                                                         pipedown_dq_veto_name) 
    combllhood_inputs = combllhood_inputs.find_output_with_tag(inj_less_tag)
    assert len(combllhood_inputs) > 0
    node = pycbc_combllhood_job.create_node(workflow.analysis_time,
                                          combllhood_inputs, horizon_dist_file)
    workflow.add_node(node)
    likelihood_file = node.output_files[0]
    # Also compute the ranking file
    node = pycbc_genranking_job.create_node(workflow.analysis_time,
                                            likelihood_file, horizon_dist_file)
    workflow.add_node(node)
    ranking_likelihood_file = node.output_files[0]
    # And marginalize (twice for some reason!)
    node = marg_likelihood_job_1.create_node(workflow.analysis_time,
                                                       ranking_likelihood_file)
    workflow.add_node(node)
    marg_likelihood_file_1 = node.output_files[0]
    node = marg_likelihood_job_2.create_node(workflow.analysis_time,
                                                        marg_likelihood_file_1)
    workflow.add_node(node)
    marg_likelihood_file_2 = node.output_files[0]

    # Now do the sqlite conditioning. This has a few stages.
                                                  
    # STAGE 1: Populate likelihood in all input files
    # STAGE 2: Run run_sqlite on all outputs of stage 1
    # STAGE 3: Combine all files into one sqlite file
    # STAGE 4: Run run_sqlite on outputs of stage 3
    # STAGE 5: Add segments.xml and inj.xml
    # STAGE 6: Run run_sqlite (cluster an simplify) on outputs of stage 5
    # STAGE 7: Dump SQL database to xml
    # STAGE 8: Run injfind on the xml document
    # STAGE 9: Convert back to SQL

    stage1_outputs = {}
    stage2_outputs = {}
    stage3_outputs = {}
    stage4_outputs = {}
    stage5_outputs = {}
    stage6_outputs = {}
    stage7_outputs = {}
    stage8_outputs = {}
    stage9_outputs = {}
    final_outputs = FileList([])
    # Do for all injection runs and zero lag
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        trig_inp_files = stage0_outputs[inj_tag]
        stage1_job = pycbc_compllhood_exe(workflow.cp,
                                      pycbc_compllhood_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE1']+curr_tags)
        stage2_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE2'] + curr_tags)
        stage3_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE3'] + curr_tags)
        stage4_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE4'] + curr_tags)
        stage5_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE5'] + curr_tags)
        if inj_tag == inj_less_tag:
            # For zero-lag we stop here, so use the FINAL tag to indicate this
            stage6_zl_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['FINAL'] + curr_tags)
        else:
            stage6_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE6'] + curr_tags)
            stage7_job = sql_to_xml_exe(workflow.cp, sql_to_xml_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE7'] + curr_tags)
            stage8_job = inspinjfind_exe(workflow.cp, inspinjfind_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE8'] + curr_tags)
            stage9_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['FINAL'] + curr_tags)

        stage1_outputs[inj_tag] = FileList([])
        stage2_outputs[inj_tag] = FileList([])
        assert len(trig_inp_files) > 0
        for file in trig_inp_files:
            stage1_node = stage1_job.create_node(file.segment, file,
                                            likelihood_file, horizon_dist_file)
            workflow.add_node(stage1_node)
            # Node has only one output file
            stage1_out = stage1_node.output_files[0]
            stage1_outputs[inj_tag].append(stage1_out)
            stage2_node = stage2_job.create_node(stage1_out.segment,
                                                                    stage1_out)
            workflow.add_node(stage2_node)
            # Node has only one output file
            stage2_out = stage2_node.output_files[0]
            stage2_outputs[inj_tag].append(stage2_out)

        stage3_node = stage3_job.create_node(workflow.analysis_time,
                                    stage2_outputs[inj_tag], workflow=workflow)
        workflow.add_node(stage3_node)
        # Node has only one output file
        stage3_out = stage3_node.output_files[0]
        stage3_outputs[inj_tag] = stage3_out
        stage4_node = stage4_job.create_node(workflow.analysis_time,
                                                                    stage3_out)
        workflow.add_node(stage4_node)
        # Node has only one output file
        stage4_out = stage4_node.output_files[0]
        stage4_outputs[inj_tag] = stage4_out

        stage5_inputs = [stage4_out]
        stage5_inputs.append(summary_xml_files[0])
        stage5_inputs.append(dq_seg_file)
        if inj_tag != inj_less_tag:
            inj_file = injection_files.find_output_with_tag(inj_tag)
            assert (len(inj_file) == 1)
            stage5_inputs.append(inj_file[0])
        stage5_node = stage5_job.create_node(workflow.analysis_time,
                                                                 stage5_inputs)
        workflow.add_node(stage5_node)
        # Node has only one output file
        stage5_out = stage5_node.output_files[0]
        stage5_outputs[inj_tag] = stage5_out
  
        if inj_tag == inj_less_tag:
            stage6_node = stage6_zl_job.create_node(workflow.analysis_time,
                                                                    stage5_out)
            workflow.add_node(stage6_node)
            stage6_out = stage6_node.output_files[0]
            stage6_outputs[inj_tag] = stage6_out
            final_outputs.append(stage6_out)
        else:
            stage6_node = stage6_job.create_node(workflow.analysis_time,
                                                                    stage5_out)
            workflow.add_node(stage6_node)
            stage6_out = stage6_node.output_files[0]
            stage6_outputs[inj_tag] = stage6_out
            stage7_node = stage7_job.create_node(workflow.analysis_time,
                                                                    stage6_out)
            workflow.add_node(stage7_node)
            stage7_out = stage7_node.output_files[0]
            stage7_outputs[inj_tag] = stage7_out
            stage8_node = stage8_job.create_node(workflow.analysis_time,
                                                                    stage7_out)
            workflow.add_node(stage8_node)
            stage8_out = stage8_node.output_files[0]
            stage8_outputs[inj_tag] = stage8_out
            stage9_node = stage9_job.create_node(workflow.analysis_time,
                                                                  [stage8_out])
            workflow.add_node(stage9_node)
            stage9_out = stage9_node.output_files[0]
            stage9_outputs[inj_tag] = stage9_out
            final_outputs.append(stage9_out)

    # Next we run the compute FAR from snr_chisq histograms job
    far_gstlal_outputs = {}
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        far_gstlal_job = far_gstlal_exe(workflow.cp, far_gstlal_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
        trig_veto_inp_files = \
                  final_outputs.find_output_with_tag(veto_tag)
        trig_inp_files = trig_veto_inp_files.find_output_with_tag(inj_tag)
        assert len(trig_inp_files) == 1
        input_database = trig_inp_files[0]
        if inj_tag != inj_less_tag:
            no_inj_db = trig_veto_inp_files.find_output_with_tag(inj_less_tag)
            assert len(no_inj_db) == 1
            no_inj_db = no_inj_db[0]
            write_background = False
        else:
            # Here I don't want to provide the same file as a dependancy
            # twice. Therefore I just give non-injection DB and the code
            # assumes this is also the input-database if it is not given.
            # Also, I only want the background file once
            no_inj_db =  input_database
            input_database = None
            write_background = True
        far_gstlal_node = far_gstlal_job.create_node(workflow.analysis_time,
                                        no_inj_db, marg_likelihood_file_2,
                                        inj_database=input_database,
                                        write_background_bins=write_background)
        workflow.add_node(far_gstlal_node)
        outputs = far_gstlal_node.output_files
        if inj_tag != inj_less_tag:
            assert len(outputs) == 1
            far_gstlal_outputs[inj_tag] = outputs[0]
        else:
            assert len(outputs) == 2
            sql_out = outputs.find_output_without_tag('POSTMARG')[0]
            xml_out = outputs.find_output_with_tag('POSTMARG')[0]
            far_gstlal_outputs[inj_tag] = sql_out
            post_marginalized_file = xml_out
            

    # Finally some plotting. 
    # FIXME: These are given explicit output directories and pegasus does not
    # know about output files. Would be nice if this was done "better"  
    curr_tags = tags + [veto_tag]
    plot_summary_job = plot_summary_exe(workflow.cp, plot_summary_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    plot_sensitivity_job = plot_sensitivity_exe(workflow.cp,
                                          plot_sensitivity_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    plot_background_job = plot_background_exe(workflow.cp,
                                          plot_background_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    inj_dbs = []
    for inj_tag in injection_tags:
        inj_dbs.append(far_gstlal_outputs[inj_tag])
    non_inj_db = far_gstlal_outputs[inj_less_tag]
    
    plot_summary_node = plot_summary_job.create_node(non_inj_db, inj_dbs)
    plot_background_node = plot_background_job.create_node(non_inj_db,
                                                        post_marginalized_file)
    plot_sensitivity_node = plot_sensitivity_job.create_node(non_inj_db,
                                                                       inj_dbs)

    workflow.add_node(plot_summary_node)
    workflow.add_node(plot_background_node)
    workflow.add_node(plot_sensitivity_node)

    # And make the html pages
    parents = [plot_summary_node, plot_background_node, plot_sensitivity_node]
    closed_summarypage_job = summary_page_exe(workflow.cp,
                                              summary_page_exe_name,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=['CLOSEDBOX'] + curr_tags)
    open_summarypage_job = summary_page_exe(workflow.cp, 
                                              summary_page_exe_name,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=['OPENBOX'] + curr_tags)

    closed_summarypage_node = closed_summarypage_job.create_and_add_node(\
                                              workflow, parents)
    open_summarypage_node = open_summarypage_job.create_and_add_node(workflow,
                                              parents)

    # FIXME: Maybe contatenate and return all other outputs if needed elsewhere
    # FIXME: Move to pp utils and return the FAR files.
    return final_outputs
Esempio n. 4
0
def setup_postprocprep_pipedown_workflow(workflow, coincFiles, output_dir,
                                      tags=[], do_repop=False, 
                                      injectionFiles=None,
                                      vetoFiles=None, injLessTag=None,
                                      injectionTags=[], veto_cats=[]):
    """
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    coincFiles : pycbc.workflow.core.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    do_repop : Boolean
        If False, use the 'coinc_inspiral.snr' column from the coincident 
        trigger files as clustering and ranking statistic; if True, use
        a repop_coinc job before clustering to calculate a different ranking
        statistic and store in the coinc_inspiral table for later use.
    injectionFiles : pycbc.workflow.core.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    vetoFiles : pycbc.workflow.core.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    injLessTag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injectionTags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cats : list of integers (optional, default = [])
        Decide which set of veto files should be used in the post-processing
        preparation. For example tell the workflow to only generate results
        at cumulative categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    finalFiles : pycbc.workflow.core.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : pycbc.workflow.core.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : pycbc.workflow.core.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : pycbc.workflow.core.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")

    # Setup needed exe classes
    sqliteCombine1ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner1-exe", tags)
    sqliteCombine1Exe = select_generic_executable(workflow, 
                                                  sqliteCombine1ExeTag)
    sqliteCombine2ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner2-exe", tags)
    sqliteCombine2Exe = select_generic_executable(workflow, 
                                                  sqliteCombine2ExeTag)
    clusterCoincsExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-cluster-exe", tags)
    clusterCoincsExe = select_generic_executable(workflow, clusterCoincsExeTag)
    injFindExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-injfind-exe", tags)
    injFindExe = select_generic_executable(workflow, injFindExeTag)

    sqliteCombine1Outs = FileList([])
    clusterCoincsOuts = FileList([])
    injFindOuts = FileList([])
    sqliteCombine2Outs = FileList([])

    if do_repop:
        repopCoincExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                                "postprocprep-repop-exe", tags)
        repopCoincExe = select_generic_executable(workflow, repopCoincExeTag)
        repopCoincOuts = FileList([])

    for cat in veto_cats:
        # FIXME: Some hacking is still needed while we support pipedown
        # FIXME: There are currently 3 names to say cumulative cat_3
        vetoTag = 'CUMULATIVE_CAT_%d' %(cat)
        dqSegFile = vetoFiles.find_output_with_tag(vetoTag)
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            raise ValueError(errMsg)
        # Don't think this is used here, this is the tag *in* the file
        dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(cat)
        # FIXME: Here we set the dqVetoName to be compatible with pipedown
        pipedownDQVetoName = 'CAT_%d_VETO' %(cat)

        sqliteCombine2Inputs = FileList([])
        # Do injection-less jobs first.

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Combine trig files first
        currTags = tags + [injLessTag, vetoTag]
        trigVetoInpFiles = coincFiles.find_output_with_tag(pipedownDQVetoName)
        trigInpFiles = trigVetoInpFiles.find_output_with_tag(injLessTag)
        if len(trigInpFiles) == 0:
            err_msg = "No input files found. Workflow would fail."
            raise ValueError(err_msg)
        trigInpFiles.append(dqSegFile[0])
        sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                              sqliteCombine1ExeTag,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles, 
                                          workflow=workflow)
        sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine1Node)
        # Node has only one output file
        sqliteCombine1Out = sqliteCombine1Node.output_files[0]
        sqliteCombine1Outs.append(sqliteCombine1Out)

        if do_repop:
            repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
            repopCoincNode = repopCoincJob.create_node(workflow.analysis_time,
                                                       sqliteCombine1Out)
            repopCoincNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(repopCoincNode)
            # Node has only one output file
            repopCoincOut = repopCoincNode.output_files[0]
            repopCoincOuts.append(repopCoincOut)

        # Input file plumbing allowing for possible repop_coinc job
        clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
        # Cluster coincidences
        clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                            clusterCoincsExeTag,
                                            ifo=workflow.ifo_string, 
                                            out_dir=output_dir, 
                                            tags=currTags)
        clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
        clusterCoincsNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(clusterCoincsNode)
        # Node has only one output file
        clusterCoincsOut = clusterCoincsNode.output_files[0]
        clusterCoincsOuts.append(clusterCoincsOut)
        sqliteCombine2Inputs.append(clusterCoincsOut)

        # Do injection jobs
        for injTag in injectionTags:
            # Choose a label for clustering the jobs
            job_label = get_random_label()
            # Combine trig files first
            currTags = tags + [injTag, vetoTag]
            trigInpFiles = trigVetoInpFiles.find_output_with_tag(injTag)
            trigInpFiles.append(dqSegFile[0])
            injFile = injectionFiles.find_output_with_tag(injTag)
            assert (len(injFile) == 1)
            sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                                  sqliteCombine1ExeTag,
                                                  ifo=workflow.ifo_string,
                                                  out_dir=output_dir,
                                                  tags=currTags)
            sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles,
                                          injFile=injFile[0], injString=injTag,
                                          workflow=workflow)
            sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
            workflow.add_node(sqliteCombine1Node)
            # Node has only one output file
            sqliteCombine1Out = sqliteCombine1Node.output_files[0]
            sqliteCombine1Outs.append(sqliteCombine1Out)

            if do_repop:
                repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
                repopCoincNode = repopCoincJob.create_node(
                                     workflow.analysis_time, sqliteCombine1Out)
                repopCoincNode.add_profile('pegasus', 'label', job_label)
                workflow.add_node(repopCoincNode)
                # Node has only one output file
                repopCoincOut = repopCoincNode.output_files[0]
                repopCoincOuts.append(repopCoincOut)

            # Input file plumbing allowing for possible repop_coinc job
            clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
            # Cluster coincidences
            clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                                clusterCoincsExeTag,
                                                ifo=workflow.ifo_string,
                                                out_dir=output_dir,
                                                tags=currTags)
            clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
            clusterCoincsNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(clusterCoincsNode)
            # Node has only one output file
            clusterCoincsOut = clusterCoincsNode.output_files[0]
            clusterCoincsOuts.append(clusterCoincsOut)
            sqliteCombine2Inputs.append(clusterCoincsOut)

        # Choose a new label for pegasus-clustering the jobs
        job_label = get_random_label()

        # Combine everything together and add veto file
        currTags = tags + [vetoTag]
        sqliteCombine2Job = sqliteCombine2Exe(workflow.cp, 
                                              sqliteCombine2ExeTag,
                                              ifo=workflow.ifo_string, 
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine2Node = sqliteCombine2Job.create_node(
                                  workflow.analysis_time, sqliteCombine2Inputs)
        sqliteCombine2Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine2Node)
        sqliteCombine2Out = sqliteCombine2Node.output_files[0]
        sqliteCombine2Outs.append(sqliteCombine2Out)

        # Inj finding
        injFindJob = injFindExe(workflow.cp, injFindExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,tags=currTags)
        injFindNode = injFindJob.create_node(workflow.analysis_time,
                                                         sqliteCombine2Out)
        injFindNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(injFindNode)
        injFindOut = injFindNode.output_files[0]
        injFindOuts.append(injFindOut)


    return injFindOuts, sqliteCombine1Outs, clusterCoincsOuts,\
           sqliteCombine2Outs
def setup_postproc_coh_PTF_workflow(workflow, trig_files, trig_cache,
                                    inj_trig_files, inj_files, inj_trig_caches,
                                    inj_caches, config_file, output_dir,
                                    html_dir, segment_dir, ifos, inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")


    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp, "injfinder", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp, "injcombiner", ifo=ifos,
                                             out_dir=output_dir, tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tag_str:
                injcombiner_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos, "foundmissed", full_segment,
                        extension="lcf", directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Initialise html_summary class
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            if out_tag == "OFFSOURCE":
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                            segment_dir, inj_file=curr_injs, tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for parent_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=parent_node._dax_node,
                                             child=sbv_plotter_node._dax_node)
                        workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(unclust_file,
                    segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                    offsource_clustered, segment_dir, tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(parent=injfinder_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = ' '.join(html_summary_node.get_command_line())
    open_box_cmd += "--open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Esempio n. 6
0
def setup_timeslides_workflow(workflow, output_dir=None, tags=[],
                              timeSlideSectionName='ligolw_tisi'):
    '''
    Setup generation of time_slide input files in the workflow.
    Currently used
    only with ligolw_tisi to generate files containing the list of slides to be
    performed in each time slide job.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.
    timeSlideSectionName : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    Returns
    --------
    timeSlideOuts : pycbc.workflow.core.FileList
        The list of time slide files created by this call.
    '''
    logging.info("Entering time slides setup module.")
    make_analysis_dir(output_dir)
    # Get ifo list and full analysis segment for output file naming
    ifoList = workflow.ifos
    ifo_string = workflow.ifo_string
    fullSegment = workflow.analysis_time

    # Identify which time-slides to do by presence of sub-sections in the
    # configuration file
    all_sec = workflow.cp.sections()
    timeSlideSections = [sec for sec in all_sec if sec.startswith('tisi-')]
    timeSlideTags = [(sec.split('-')[-1]).upper() for sec in timeSlideSections]

    timeSlideOuts = FileList([])

    # FIXME: Add ability to specify different exes

    # Make the timeSlideFiles
    for timeSlideTag in timeSlideTags:
        currTags = tags + [timeSlideTag]

        timeSlideMethod = workflow.cp.get_opt_tags("workflow-timeslides",
                                                 "timeslides-method", currTags)

        if timeSlideMethod in ["IN_WORKFLOW", "AT_RUNTIME"]:
            timeSlideExeTag = workflow.cp.get_opt_tags("workflow-timeslides",
                                                    "timeslides-exe", currTags)
            timeSlideExe = select_generic_executable(workflow, timeSlideExeTag)
            timeSlideJob = timeSlideExe(workflow.cp, timeSlideExeTag, ifos=ifo_string,
                                             tags=currTags, out_dir=output_dir)
            timeSlideNode = timeSlideJob.create_node(fullSegment)
            if timeSlideMethod == "AT_RUNTIME":
                workflow.execute_node(timeSlideNode)
            else:
                workflow.add_node(timeSlideNode)
            tisiOutFile = timeSlideNode.output_files[0]
        elif timeSlideMethod == "PREGENERATED":
            timeSlideFilePath = workflow.cp.get_opt_tags("workflow-timeslides",
                                      "timeslides-pregenerated-file", currTags)
            file_url = urlparse.urljoin('file:', urllib.pathname2url(\
                                                  timeSlideFilePath))
            tisiOutFile = File(ifoString, 'PREGEN_TIMESLIDES',
                               fullSegment, file_url, tags=currTags)

        timeSlideOuts.append(tisiOutFile)

    return timeSlideOuts
Esempio n. 7
0
def setup_postproc_coh_PTF_workflow(workflow,
                                    trig_files,
                                    trig_cache,
                                    inj_trig_files,
                                    inj_files,
                                    inj_trig_caches,
                                    inj_caches,
                                    config_file,
                                    output_dir,
                                    html_dir,
                                    segment_dir,
                                    ifos,
                                    inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp,
                                         "injfinder",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp,
                                             "injcombiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos,
                        "foundmissed",
                        full_segment,
                        extension="lcf",
                        directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(
                        clust_file,
                        segment_dir,
                        inj_file=curr_injs,
                        tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(
                unclust_file, segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                                                          offsource_clustered,
                                                          segment_dir,
                                                          tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)

                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(
                            parent=injcombiner_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(
                            parent=injfinder_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = html_summary_node.executable.get_pfn() + " "
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Esempio n. 8
0
def setup_injection_workflow(workflow,
                             output_dir=None,
                             inj_section_name='injections',
                             exttrig_file=None,
                             tags=None):
    """
    This function is the gateway for setting up injection-generation jobs in a
    workflow. It should be possible for this function to support a number
    of different ways/codes that could be used for doing this, however as this
    will presumably stay as a single call to a single code (which need not be
    inspinj) there are currently no subfunctions in this moudle.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which injection files will be stored.
    inj_section_name : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.

    Returns
    --------
    inj_files : pycbc.workflow.core.FileList
        The list of injection files created by this call.
    inj_tags : list of strings
        The tag corresponding to each injection file and used to uniquely
        identify them. The FileList class contains functions to search
        based on tags.
    """
    if tags is None:
        tags = []
    logging.info("Entering injection module.")
    make_analysis_dir(output_dir)

    # Get full analysis segment for output file naming
    full_segment = workflow.analysis_time
    ifos = workflow.ifos

    # Identify which injections to do by presence of sub-sections in
    # the configuration file
    inj_tags = []
    inj_files = FileList([])

    for section in workflow.cp.get_subsections(inj_section_name):
        inj_tag = section.upper()
        curr_tags = tags + [inj_tag]

        # Parse for options in ini file
        injection_method = workflow.cp.get_opt_tags("workflow-injections",
                                                    "injections-method",
                                                    curr_tags)

        if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]:
            exe = select_generic_executable(workflow, 'injections')
            inj_job = exe(workflow.cp,
                          inj_section_name,
                          out_dir=output_dir,
                          ifos='HL',
                          tags=curr_tags)
            node = inj_job.create_node(full_segment)
            if injection_method == "AT_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]
            inj_files.append(inj_file)
        elif injection_method == "PREGENERATED":
            file_attrs = {
                'ifos': ['HL'],
                'segs': full_segment,
                'tags': curr_tags
            }
            injection_path = workflow.cp.get_opt_tags(
                "workflow-injections", "injections-pregenerated-file",
                curr_tags)
            curr_file = resolve_url_to_file(injection_path, attrs=file_attrs)
            inj_files.append(curr_file)
        elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]:
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos=ifos,
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment, exttrig_file)
            if injection_method == "AT_COH_PTF_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections", "em-bright-only"):
                em_filter_job = PycbcDarkVsBrightInjectionsExecutable(
                    workflow.cp,
                    'em_bright_filter',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = em_filter_job.create_node(inj_file, full_segment,
                                                 curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-jitter-skyloc"):
                jitter_job = LigolwCBCJitterSkylocExecutable(
                    workflow.cp,
                    'jitter_skyloc',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = jitter_job.create_node(inj_file, full_segment,
                                              curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-align-total-spin"):
                align_job = LigolwCBCAlignTotalSpinExecutable(
                    workflow.cp,
                    'align_total_spin',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = align_job.create_node(inj_file, full_segment, curr_tags)

                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            inj_files.append(inj_file)
        else:
            err = "Injection method must be one of IN_WORKFLOW, "
            err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method)
            raise ValueError(err)

        inj_tags.append(inj_tag)

    logging.info("Leaving injection module.")
    return inj_files, inj_tags
Esempio n. 9
0
def setup_postproc_pipedown_workflow(workflow,
                                     trigger_files,
                                     summary_xml_files,
                                     output_dir,
                                     tags=[],
                                     veto_cats=[]):
    """
    This module sets up the post-processing stage in the workflow, using a pipedown
    style set up. This consists of running compute_durations to determine and
    store the analaysis time (foreground and background). It then runs cfar
    jobs to determine the false alarm rate for all triggers (simulations or
    otherwise) in the input database.
    Pipedown expects to take as input (at this stage) a single database
    containing all triggers. This sub-module follows that same idea, so
    len(triggerFiles) must equal 1 (for every DQ category that we will run).

    Parameters
    ----------  
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList containing the combined databases at CAT_1,2,3... that
        will be used to calculate FARs
    summary_xml_files : pycbc.workflow.core.FileList (required)
        A FileList of the output of the analysislogging_utils module.
        For pipedown-style post-processing this should be one file containing
        a segment table holding the single detector analysed times.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    veto_cats : list of integers (default = [], non-empty list required)
        Decide which veto category levels should be used in post-processing.
        For example tell the workflow to only generate results at cumulative
        categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    final_files : pycbc.workflow.core.FileList
        A list of the final SQL databases containing computed FARs.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    compute_durations_exe_tag = workflow.cp.get_opt_tags(
        "workflow-postproc", "postproc-computedurations-exe", tags)
    compute_durations_exe = select_generic_executable(
        workflow, compute_durations_exe_tag)
    cfar_exe_tag = workflow.cp.get_opt_tags("workflow-postproc",
                                            "postproc-cfar-exe", tags)
    cfar_exe = select_generic_executable(workflow, cfar_exe_tag)

    comp_durations_outs = FileList([])
    cfar_outs = FileList([])

    for cat in veto_cats:

        veto_tag = 'CUMULATIVE_CAT_%d' % (cat)
        trig_input_files = trigger_files.find_output_with_tag(veto_tag)
        if not len(trig_input_files) == 1:
            err_msg = "Did not find exactly 1 database input file."
            raise ValueError(err_msg)

        curr_tags = tags + [veto_tag]

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Start with compute durations
        computeDurationsJob = compute_durations_exe(workflow.cp,
                                                    compute_durations_exe_tag,
                                                    ifo=workflow.ifo_string,
                                                    out_dir=output_dir,
                                                    tags=curr_tags)
        compute_durations_node = computeDurationsJob.create_node(
            workflow.analysis_time, trig_input_files[0], summary_xml_files[0])
        compute_durations_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(compute_durations_node)

        # Node has only one output file
        compute_durations_out = compute_durations_node.output_files[0]
        comp_durations_outs.append(compute_durations_out)

        # Add the calculate FAR (cfar) job
        cfar_job = cfar_exe(workflow.cp,
                            cfar_exe_tag,
                            ifo=workflow.ifo_string,
                            out_dir=output_dir,
                            tags=curr_tags)
        cfar_node = cfar_job.create_node(workflow.analysis_time,
                                         compute_durations_out)
        cfar_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(cfar_node)

        # Node has only one output file
        cfar_out = cfar_node.output_files[0]
        cfar_outs.append(cfar_out)

    return cfar_outs
Esempio n. 10
0
def setup_postproc_pipedown_workflow(workflow, trigger_files, summary_xml_files,
                                  output_dir, tags=[], veto_cats=[]):
    """
    This module sets up the post-processing stage in the workflow, using a pipedown
    style set up. This consists of running compute_durations to determine and
    store the analaysis time (foreground and background). It then runs cfar
    jobs to determine the false alarm rate for all triggers (simulations or
    otherwise) in the input database.
    Pipedown expects to take as input (at this stage) a single database
    containing all triggers. This sub-module follows that same idea, so
    len(triggerFiles) must equal 1 (for every DQ category that we will run).

    Parameters
    ----------  
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList containing the combined databases at CAT_1,2,3... that
        will be used to calculate FARs
    summary_xml_files : pycbc.workflow.core.FileList (required)
        A FileList of the output of the analysislogging_utils module.
        For pipedown-style post-processing this should be one file containing
        a segment table holding the single detector analysed times.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    veto_cats : list of integers (default = [], non-empty list required)
        Decide which veto category levels should be used in post-processing.
        For example tell the workflow to only generate results at cumulative
        categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    final_files : pycbc.workflow.core.FileList
        A list of the final SQL databases containing computed FARs.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    compute_durations_exe_tag = workflow.cp.get_opt_tags("workflow-postproc",
                                   "postproc-computedurations-exe", tags)
    compute_durations_exe = select_generic_executable(workflow,
                                                     compute_durations_exe_tag)
    cfar_exe_tag = workflow.cp.get_opt_tags("workflow-postproc", "postproc-cfar-exe",
                                       tags)
    cfar_exe = select_generic_executable(workflow, cfar_exe_tag)

    comp_durations_outs = FileList([])
    cfar_outs = FileList([])

    for cat in veto_cats:

        veto_tag = 'CUMULATIVE_CAT_%d' %(cat)
        trig_input_files = trigger_files.find_output_with_tag(veto_tag)
        if not len(trig_input_files) == 1:
            err_msg = "Did not find exactly 1 database input file."
            raise ValueError(err_msg)

        curr_tags = tags + [veto_tag]

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Start with compute durations
        computeDurationsJob = compute_durations_exe(workflow.cp, compute_durations_exe_tag,
                                                 ifo=workflow.ifo_string, 
                                                 out_dir=output_dir, 
                                                 tags=curr_tags)
        compute_durations_node = computeDurationsJob.create_node(
                                    workflow.analysis_time, trig_input_files[0],
                                    summary_xml_files[0])
        compute_durations_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(compute_durations_node)

        # Node has only one output file
        compute_durations_out = compute_durations_node.output_files[0]
        comp_durations_outs.append(compute_durations_out)

        # Add the calculate FAR (cfar) job
        cfar_job = cfar_exe(workflow.cp, cfar_exe_tag, 
                                      ifo=workflow.ifo_string, 
                                      out_dir=output_dir, 
                                      tags=curr_tags)
        cfar_node = cfar_job.create_node(workflow.analysis_time,
                                       compute_durations_out)
        cfar_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(cfar_node)

        # Node has only one output file
        cfar_out = cfar_node.output_files[0]
        cfar_outs.append(cfar_out)

    return cfar_outs
Esempio n. 11
0
def setup_postprocprep_pipedown_workflow(workflow, coincFiles, output_dir,
                                      tags=[], do_repop=False, 
                                      injectionFiles=None,
                                      vetoFiles=None, injLessTag=None,
                                      injectionTags=[], veto_cats=[]):
    """
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    coincFiles : pycbc.workflow.core.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    do_repop : Boolean
        If False, use the 'coinc_inspiral.snr' column from the coincident 
        trigger files as clustering and ranking statistic; if True, use
        a repop_coinc job before clustering to calculate a different ranking
        statistic and store in the coinc_inspiral table for later use.
    injectionFiles : pycbc.workflow.core.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    vetoFiles : pycbc.workflow.core.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    injLessTag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injectionTags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cats : list of integers (optional, default = [])
        Decide which set of veto files should be used in the post-processing
        preparation. For example tell the workflow to only generate results
        at cumulative categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    finalFiles : pycbc.workflow.core.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : pycbc.workflow.core.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : pycbc.workflow.core.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : pycbc.workflow.core.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")

    # Setup needed exe classes
    sqliteCombine1ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner1-exe", tags)
    sqliteCombine1Exe = select_generic_executable(workflow, 
                                                  sqliteCombine1ExeTag)
    sqliteCombine2ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner2-exe", tags)
    sqliteCombine2Exe = select_generic_executable(workflow, 
                                                  sqliteCombine2ExeTag)
    clusterCoincsExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-cluster-exe", tags)
    clusterCoincsExe = select_generic_executable(workflow, clusterCoincsExeTag)
    injFindExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-injfind-exe", tags)
    injFindExe = select_generic_executable(workflow, injFindExeTag)

    sqliteCombine1Outs = FileList([])
    clusterCoincsOuts = FileList([])
    injFindOuts = FileList([])
    sqliteCombine2Outs = FileList([])

    if do_repop:
        repopCoincExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                                "postprocprep-repop-exe", tags)
        repopCoincExe = select_generic_executable(workflow, repopCoincExeTag)
        repopCoincOuts = FileList([])

    for cat in veto_cats:
        # FIXME: Some hacking is still needed while we support pipedown
        # FIXME: There are currently 3 names to say cumulative cat_3
        vetoTag = 'CUMULATIVE_CAT_%d' %(cat)
        dqSegFile = vetoFiles.find_output_with_tag(vetoTag)
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            raise ValueError(errMsg)
        # Don't think this is used here, this is the tag *in* the file
        dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(cat)
        # FIXME: Here we set the dqVetoName to be compatible with pipedown
        pipedownDQVetoName = 'CAT_%d_VETO' %(cat)

        sqliteCombine2Inputs = FileList([])
        # Do injection-less jobs first.

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Combine trig files first
        currTags = tags + [injLessTag, vetoTag]
        trigVetoInpFiles = coincFiles.find_output_with_tag(pipedownDQVetoName)
        trigInpFiles = trigVetoInpFiles.find_output_with_tag(injLessTag)
        if len(trigInpFiles) == 0:
            err_msg = "No input files found. Workflow would fail."
            raise ValueError(err_msg)
        trigInpFiles.append(dqSegFile[0])
        sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                              sqliteCombine1ExeTag,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles, 
                                          workflow=workflow)
        sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine1Node)
        # Node has only one output file
        sqliteCombine1Out = sqliteCombine1Node.output_files[0]
        sqliteCombine1Outs.append(sqliteCombine1Out)

        if do_repop:
            repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
            repopCoincNode = repopCoincJob.create_node(workflow.analysis_time,
                                                       sqliteCombine1Out)
            repopCoincNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(repopCoincNode)
            # Node has only one output file
            repopCoincOut = repopCoincNode.output_files[0]
            repopCoincOuts.append(repopCoincOut)

        # Input file plumbing allowing for possible repop_coinc job
        clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
        # Cluster coincidences
        clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                            clusterCoincsExeTag,
                                            ifo=workflow.ifo_string, 
                                            out_dir=output_dir, 
                                            tags=currTags)
        clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
        clusterCoincsNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(clusterCoincsNode)
        # Node has only one output file
        clusterCoincsOut = clusterCoincsNode.output_files[0]
        clusterCoincsOuts.append(clusterCoincsOut)
        sqliteCombine2Inputs.append(clusterCoincsOut)

        # Do injection jobs
        for injTag in injectionTags:
            # Choose a label for clustering the jobs
            job_label = get_random_label()
            # Combine trig files first
            currTags = tags + [injTag, vetoTag]
            trigInpFiles = trigVetoInpFiles.find_output_with_tag(injTag)
            trigInpFiles.append(dqSegFile[0])
            injFile = injectionFiles.find_output_with_tag(injTag)
            assert (len(injFile) == 1)
            sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                                  sqliteCombine1ExeTag,
                                                  ifo=workflow.ifo_string,
                                                  out_dir=output_dir,
                                                  tags=currTags)
            sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles,
                                          injFile=injFile[0], injString=injTag,
                                          workflow=workflow)
            sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
            workflow.add_node(sqliteCombine1Node)
            # Node has only one output file
            sqliteCombine1Out = sqliteCombine1Node.output_files[0]
            sqliteCombine1Outs.append(sqliteCombine1Out)

            if do_repop:
                repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
                repopCoincNode = repopCoincJob.create_node(
                                     workflow.analysis_time, sqliteCombine1Out)
                repopCoincNode.add_profile('pegasus', 'label', job_label)
                workflow.add_node(repopCoincNode)
                # Node has only one output file
                repopCoincOut = repopCoincNode.output_files[0]
                repopCoincOuts.append(repopCoincOut)

            # Input file plumbing allowing for possible repop_coinc job
            clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
            # Cluster coincidences
            clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                                clusterCoincsExeTag,
                                                ifo=workflow.ifo_string,
                                                out_dir=output_dir,
                                                tags=currTags)
            clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
            clusterCoincsNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(clusterCoincsNode)
            # Node has only one output file
            clusterCoincsOut = clusterCoincsNode.output_files[0]
            clusterCoincsOuts.append(clusterCoincsOut)
            sqliteCombine2Inputs.append(clusterCoincsOut)

        # Choose a new label for pegasus-clustering the jobs
        job_label = get_random_label()

        # Combine everything together and add veto file
        currTags = tags + [vetoTag]
        sqliteCombine2Job = sqliteCombine2Exe(workflow.cp, 
                                              sqliteCombine2ExeTag,
                                              ifo=workflow.ifo_string, 
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine2Node = sqliteCombine2Job.create_node(
                                  workflow.analysis_time, sqliteCombine2Inputs)
        sqliteCombine2Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine2Node)
        sqliteCombine2Out = sqliteCombine2Node.output_files[0]
        sqliteCombine2Outs.append(sqliteCombine2Out)

        # Inj finding
        injFindJob = injFindExe(workflow.cp, injFindExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,tags=currTags)
        injFindNode = injFindJob.create_node(workflow.analysis_time,
                                                         sqliteCombine2Out)
        injFindNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(injFindNode)
        injFindOut = injFindNode.output_files[0]
        injFindOuts.append(injFindOut)


    return injFindOuts, sqliteCombine1Outs, clusterCoincsOuts,\
           sqliteCombine2Outs
Esempio n. 12
0
def setup_postprocprep_gstlal_workflow(workflow, coinc_files, output_dir,
                                       tags=[], injection_files=None,
                                       veto_files=None, inj_less_tag=None,
                                       injection_tags=[], veto_cat=None,
                                       summary_xml_files=None,
                                       likelihood_files=[]):
    """
    Parameters
    -----------
    workflow : workflow.Workflow
        The workflow instance that the coincidence jobs will be added to.
    coinc_files : workflow.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    injection_files : workflow.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    veto_files : workflow.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    inj_less_tag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injection_tags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cat : int (optional, default = None)
        FIXME: How does gstlal deal with veto categories?
        Hardcode to CAT1 for now.
    summary_xml_files : workflow.FileList
        An FileList of the output of the analysislogging_utils module.
        Here, this will be one file that includes the segments analysed by the
        workflow.

    Returns
    --------
    finalFiles : workflow.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : workflow.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : workflow.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : workflow.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    # Sanity checks
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    run_sqlite_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-runsqlite-exe", tags)
    ligolw_sqlite_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-ligolwsqlite-exe", tags) 
    inspinjfind_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-inspinjfind-exe", tags)
    sql_to_xml_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-sqltoxml-exe", tags)
    pycbc_picklehor_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-picklehor-exe", tags)
    pycbc_combllhood_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combllhood-exe", tags)
    pycbc_genranking_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-genranking-exe", tags)
    pycbc_compllhood_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-compllhood-exe", tags)
    marg_likelihood_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-marglikelihood-exe", tags)
    far_gstlal_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-fargstlal-exe", tags)
    plot_summary_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotsummary-exe", tags)
    plot_sensitivity_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotsensitivity-exe", tags)
    plot_background_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotbackground-exe", tags)
    summary_page_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-summarypage-exe", tags)


    run_sqlite_exe = select_generic_executable(workflow, run_sqlite_exe_name)
    ligolw_sqlite_exe = select_generic_executable(workflow,
                                                        ligolw_sqlite_exe_name)
    inspinjfind_exe = select_generic_executable(workflow, inspinjfind_exe_name)
    sql_to_xml_exe = select_generic_executable(workflow, sql_to_xml_exe_name)
    pycbc_picklehor_exe = select_generic_executable(workflow,
                                                      pycbc_picklehor_exe_name)
    pycbc_combllhood_exe = select_generic_executable(workflow,
                                                     pycbc_combllhood_exe_name)
    pycbc_genranking_exe = select_generic_executable(workflow,
                                                     pycbc_genranking_exe_name)
    pycbc_compllhood_exe = select_generic_executable(workflow,
                                                     pycbc_compllhood_exe_name)
    marg_likelihood_exe = select_generic_executable(workflow,
                                                      marg_likelihood_exe_name)
    far_gstlal_exe = select_generic_executable(workflow, far_gstlal_exe_name)
    plot_summary_exe = select_generic_executable(workflow,
                                                         plot_summary_exe_name)
    plot_sensitivity_exe = select_generic_executable(workflow,
                                                     plot_sensitivity_exe_name)
    plot_background_exe = select_generic_executable(workflow,
                                                      plot_background_exe_name)
    summary_page_exe = select_generic_executable(workflow,
                                                         summary_page_exe_name)


    # SETUP
    # FIXME: Some hacking is still needed while we support pipedown
    # FIXME: How does gstlal deal with veto categories?
    #         Hardcode to CAT1 for now.
    veto_tag = 'CUMULATIVE_CAT_%d' %(veto_cat,)
    dq_seg_file = veto_files.find_output_with_tag(veto_tag)
    assert len(dq_seg_file) == 1
    dq_seg_file = dq_seg_file[0]
    #if not len(dqSegFile) == 1:
    #    errMsg = "Did not find exactly 1 data quality file."
    #    raise ValueError(errMsg)
    # FIXME: Here we set the dqVetoName to be compatible with pipedown
    pipedown_dq_veto_name = 'CAT_%d_VETO' %(veto_cat,)

    # First we need to covert to SQL, this is STAGE0
    # Do for all injection runs and zero lag
    stage0_outputs = {}
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        trig_veto_inp_files = \
                  coinc_files.find_output_with_tag(pipedown_dq_veto_name)
        trig_inp_files = trig_veto_inp_files.find_output_with_tag(inj_tag)
        stage0_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE0'] + curr_tags)
        stage0_outputs[inj_tag] = FileList([])
        assert len(trig_inp_files) > 0
        for file in trig_inp_files:
            stage0_node = stage0_job.create_node(file.segment, [file])
            workflow.add_node(stage0_node)
            # Node has only one output file
            stage0_out = stage0_node.output_files[0]
            stage0_outputs[inj_tag].append(stage0_out)

    curr_tags = tags + [veto_tag]

    # NOW WE DO LIKELIHOOD SETUP
    pycbc_picklehor_job = pycbc_picklehor_exe(workflow.cp,
                                  pycbc_picklehor_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    pycbc_combllhood_job = pycbc_combllhood_exe(workflow.cp,
                                  pycbc_combllhood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    pycbc_genranking_job = pycbc_genranking_exe(workflow.cp, 
                                  pycbc_genranking_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    marg_likelihood_job_1 = marg_likelihood_exe(workflow.cp,
                                  marg_likelihood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=['MARG1']+curr_tags)
    marg_likelihood_job_2 = marg_likelihood_exe(workflow.cp,
                                  marg_likelihood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=['MARG2']+curr_tags)


    # Begin with finding the horizon distances
    picklehor_inputs = stage0_outputs[inj_less_tag]
    node = pycbc_picklehor_job.create_node(workflow.analysis_time,
                                                              picklehor_inputs)
    workflow.add_node(node)
    horizon_dist_file = node.output_files[0]
    # Then combine all likelihood files
    combllhood_inputs = likelihood_files.find_output_with_tag(\
                                                         pipedown_dq_veto_name) 
    combllhood_inputs = combllhood_inputs.find_output_with_tag(inj_less_tag)
    assert len(combllhood_inputs) > 0
    node = pycbc_combllhood_job.create_node(workflow.analysis_time,
                                          combllhood_inputs, horizon_dist_file)
    workflow.add_node(node)
    likelihood_file = node.output_files[0]
    # Also compute the ranking file
    node = pycbc_genranking_job.create_node(workflow.analysis_time,
                                            likelihood_file, horizon_dist_file)
    workflow.add_node(node)
    ranking_likelihood_file = node.output_files[0]
    # And marginalize (twice for some reason!)
    node = marg_likelihood_job_1.create_node(workflow.analysis_time,
                                                       ranking_likelihood_file)
    workflow.add_node(node)
    marg_likelihood_file_1 = node.output_files[0]
    node = marg_likelihood_job_2.create_node(workflow.analysis_time,
                                                        marg_likelihood_file_1)
    workflow.add_node(node)
    marg_likelihood_file_2 = node.output_files[0]

    # Now do the sqlite conditioning. This has a few stages.
                                                  
    # STAGE 1: Populate likelihood in all input files
    # STAGE 2: Run run_sqlite on all outputs of stage 1
    # STAGE 3: Combine all files into one sqlite file
    # STAGE 4: Run run_sqlite on outputs of stage 3
    # STAGE 5: Add segments.xml and inj.xml
    # STAGE 6: Run run_sqlite (cluster an simplify) on outputs of stage 5
    # STAGE 7: Dump SQL database to xml
    # STAGE 8: Run injfind on the xml document
    # STAGE 9: Convert back to SQL

    stage1_outputs = {}
    stage2_outputs = {}
    stage3_outputs = {}
    stage4_outputs = {}
    stage5_outputs = {}
    stage6_outputs = {}
    stage7_outputs = {}
    stage8_outputs = {}
    stage9_outputs = {}
    final_outputs = FileList([])
    # Do for all injection runs and zero lag
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        trig_inp_files = stage0_outputs[inj_tag]
        stage1_job = pycbc_compllhood_exe(workflow.cp,
                                      pycbc_compllhood_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE1']+curr_tags)
        stage2_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE2'] + curr_tags)
        stage3_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE3'] + curr_tags)
        stage4_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE4'] + curr_tags)
        stage5_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE5'] + curr_tags)
        if inj_tag == inj_less_tag:
            # For zero-lag we stop here, so use the FINAL tag to indicate this
            stage6_zl_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['FINAL'] + curr_tags)
        else:
            stage6_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE6'] + curr_tags)
            stage7_job = sql_to_xml_exe(workflow.cp, sql_to_xml_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE7'] + curr_tags)
            stage8_job = inspinjfind_exe(workflow.cp, inspinjfind_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE8'] + curr_tags)
            stage9_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['FINAL'] + curr_tags)

        stage1_outputs[inj_tag] = FileList([])
        stage2_outputs[inj_tag] = FileList([])
        assert len(trig_inp_files) > 0
        for file in trig_inp_files:
            stage1_node = stage1_job.create_node(file.segment, file,
                                            likelihood_file, horizon_dist_file)
            workflow.add_node(stage1_node)
            # Node has only one output file
            stage1_out = stage1_node.output_files[0]
            stage1_outputs[inj_tag].append(stage1_out)
            stage2_node = stage2_job.create_node(stage1_out.segment,
                                                                    stage1_out)
            workflow.add_node(stage2_node)
            # Node has only one output file
            stage2_out = stage2_node.output_files[0]
            stage2_outputs[inj_tag].append(stage2_out)

        stage3_node = stage3_job.create_node(workflow.analysis_time,
                                    stage2_outputs[inj_tag], workflow=workflow)
        workflow.add_node(stage3_node)
        # Node has only one output file
        stage3_out = stage3_node.output_files[0]
        stage3_outputs[inj_tag] = stage3_out
        stage4_node = stage4_job.create_node(workflow.analysis_time,
                                                                    stage3_out)
        workflow.add_node(stage4_node)
        # Node has only one output file
        stage4_out = stage4_node.output_files[0]
        stage4_outputs[inj_tag] = stage4_out

        stage5_inputs = [stage4_out]
        stage5_inputs.append(summary_xml_files[0])
        stage5_inputs.append(dq_seg_file)
        if inj_tag != inj_less_tag:
            inj_file = injection_files.find_output_with_tag(inj_tag)
            assert (len(inj_file) == 1)
            stage5_inputs.append(inj_file[0])
        stage5_node = stage5_job.create_node(workflow.analysis_time,
                                                                 stage5_inputs)
        workflow.add_node(stage5_node)
        # Node has only one output file
        stage5_out = stage5_node.output_files[0]
        stage5_outputs[inj_tag] = stage5_out
  
        if inj_tag == inj_less_tag:
            stage6_node = stage6_zl_job.create_node(workflow.analysis_time,
                                                                    stage5_out)
            workflow.add_node(stage6_node)
            stage6_out = stage6_node.output_files[0]
            stage6_outputs[inj_tag] = stage6_out
            final_outputs.append(stage6_out)
        else:
            stage6_node = stage6_job.create_node(workflow.analysis_time,
                                                                    stage5_out)
            workflow.add_node(stage6_node)
            stage6_out = stage6_node.output_files[0]
            stage6_outputs[inj_tag] = stage6_out
            stage7_node = stage7_job.create_node(workflow.analysis_time,
                                                                    stage6_out)
            workflow.add_node(stage7_node)
            stage7_out = stage7_node.output_files[0]
            stage7_outputs[inj_tag] = stage7_out
            stage8_node = stage8_job.create_node(workflow.analysis_time,
                                                                    stage7_out)
            workflow.add_node(stage8_node)
            stage8_out = stage8_node.output_files[0]
            stage8_outputs[inj_tag] = stage8_out
            stage9_node = stage9_job.create_node(workflow.analysis_time,
                                                                  [stage8_out])
            workflow.add_node(stage9_node)
            stage9_out = stage9_node.output_files[0]
            stage9_outputs[inj_tag] = stage9_out
            final_outputs.append(stage9_out)

    # Next we run the compute FAR from snr_chisq histograms job
    far_gstlal_outputs = {}
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        far_gstlal_job = far_gstlal_exe(workflow.cp, far_gstlal_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
        trig_veto_inp_files = \
                  final_outputs.find_output_with_tag(veto_tag)
        trig_inp_files = trig_veto_inp_files.find_output_with_tag(inj_tag)
        assert len(trig_inp_files) == 1
        input_database = trig_inp_files[0]
        if inj_tag != inj_less_tag:
            no_inj_db = trig_veto_inp_files.find_output_with_tag(inj_less_tag)
            assert len(no_inj_db) == 1
            no_inj_db = no_inj_db[0]
            write_background = False
        else:
            # Here I don't want to provide the same file as a dependancy
            # twice. Therefore I just give non-injection DB and the code
            # assumes this is also the input-database if it is not given.
            # Also, I only want the background file once
            no_inj_db =  input_database
            input_database = None
            write_background = True
        far_gstlal_node = far_gstlal_job.create_node(workflow.analysis_time,
                                        no_inj_db, marg_likelihood_file_2,
                                        inj_database=input_database,
                                        write_background_bins=write_background)
        workflow.add_node(far_gstlal_node)
        outputs = far_gstlal_node.output_files
        if inj_tag != inj_less_tag:
            assert len(outputs) == 1
            far_gstlal_outputs[inj_tag] = outputs[0]
        else:
            assert len(outputs) == 2
            sql_out = outputs.find_output_without_tag('POSTMARG')[0]
            xml_out = outputs.find_output_with_tag('POSTMARG')[0]
            far_gstlal_outputs[inj_tag] = sql_out
            post_marginalized_file = xml_out
            

    # Finally some plotting. 
    # FIXME: These are given explicit output directories and pegasus does not
    # know about output files. Would be nice if this was done "better"  
    curr_tags = tags + [veto_tag]
    plot_summary_job = plot_summary_exe(workflow.cp, plot_summary_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    plot_sensitivity_job = plot_sensitivity_exe(workflow.cp,
                                          plot_sensitivity_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    plot_background_job = plot_background_exe(workflow.cp,
                                          plot_background_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    inj_dbs = []
    for inj_tag in injection_tags:
        inj_dbs.append(far_gstlal_outputs[inj_tag])
    non_inj_db = far_gstlal_outputs[inj_less_tag]
    
    plot_summary_node = plot_summary_job.create_node(non_inj_db, inj_dbs)
    plot_background_node = plot_background_job.create_node(non_inj_db,
                                                        post_marginalized_file)
    plot_sensitivity_node = plot_sensitivity_job.create_node(non_inj_db,
                                                                       inj_dbs)

    workflow.add_node(plot_summary_node)
    workflow.add_node(plot_background_node)
    workflow.add_node(plot_sensitivity_node)

    # And make the html pages
    parents = [plot_summary_node, plot_background_node, plot_sensitivity_node]
    closed_summarypage_job = summary_page_exe(workflow.cp,
                                              summary_page_exe_name,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=['CLOSEDBOX'] + curr_tags)
    open_summarypage_job = summary_page_exe(workflow.cp, 
                                              summary_page_exe_name,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=['OPENBOX'] + curr_tags)

    closed_summarypage_node = closed_summarypage_job.create_and_add_node(\
                                              workflow, parents)
    open_summarypage_node = open_summarypage_job.create_and_add_node(workflow,
                                              parents)

    # FIXME: Maybe contatenate and return all other outputs if needed elsewhere
    # FIXME: Move to pp utils and return the FAR files.
    return final_outputs