Example #1
0
def make_gating_node(workflow, datafind_files, outdir=None, tags=None):
    '''
    Generate jobs for autogating the data for PyGRB runs.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    datafind_files : pycbc.workflow.core.FileList
        A FileList containing the frame files to be gated.
    outdir : string
        Path of the output directory
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    condition_strain_nodes : list
        List containing the pycbc.workflow.core.Node objects representing the
        autogating jobs.
    condition_strain_outs : pycbc.workflow.core.FileList
        FileList containing the pycbc.workflow.core.File objects representing
        the gated frame files.
    '''

    cp = workflow.cp
    if tags is None:
        tags = []

    condition_strain_class = select_generic_executable(workflow,
                                                       "condition_strain")
    condition_strain_nodes = []
    condition_strain_outs = FileList([])
    for ifo in workflow.ifos:
        input_files = FileList([datafind_file for datafind_file in \
                                datafind_files if datafind_file.ifo == ifo])
        condition_strain_jobs = condition_strain_class(cp,
                                                       "condition_strain",
                                                       ifo=ifo,
                                                       out_dir=outdir,
                                                       tags=tags)
        condition_strain_node, condition_strain_out = \
                condition_strain_jobs.create_node(input_files, tags=tags)
        condition_strain_nodes.append(condition_strain_node)
        condition_strain_outs.extend(FileList([condition_strain_out]))

    return condition_strain_nodes, condition_strain_outs
Example #2
0
def make_gating_node(workflow, datafind_files, outdir=None, tags=None):
    '''
    Generate jobs for autogating the data for PyGRB runs.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    datafind_files : pycbc.workflow.core.FileList
        A FileList containing the frame files to be gated.
    outdir : string
        Path of the output directory
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    condition_strain_nodes : list
        List containing the pycbc.workflow.core.Node objects representing the
        autogating jobs.
    condition_strain_outs : pycbc.workflow.core.FileList
        FileList containing the pycbc.workflow.core.File objects representing
        the gated frame files.
    '''

    cp = workflow.cp
    if tags is None:
        tags = []
    
    condition_strain_class = select_generic_executable(workflow,
                                                       "condition_strain")
    condition_strain_nodes = []
    condition_strain_outs = FileList([])
    for ifo in workflow.ifos:
        input_files = FileList([datafind_file for datafind_file in \
                                datafind_files if datafind_file.ifo == ifo])
        condition_strain_jobs = condition_strain_class(cp, "condition_strain",
                ifo=ifo, out_dir=outdir, tags=tags)
        condition_strain_node, condition_strain_out = \
                condition_strain_jobs.create_node(input_files, tags=tags)
        condition_strain_nodes.append(condition_strain_node)
        condition_strain_outs.extend(FileList([condition_strain_out]))

    return condition_strain_nodes, condition_strain_outs
Example #3
0
def setup_minifollowups(workflow, out_dir, frame_files,
                             coinc_file, tmpltbank_file, data_type, tags=None):
    ''' This performs a series of followup jobs on the num_events-th loudest
    events.
    '''

    logging.info('Entering minifollowups module')

    if tags == None: tags = []

    # create a FileList that will contain all output files
    output_filelist = FileList([])

    # check if minifollowups section exists
    # if not then do not do add minifollowup jobs to the workflow
    if not workflow.cp.has_section('workflow-minifollowups'):
      logging.info('There is no [workflow-minifollowups] section in configuration file')
      logging.info('Leaving minifollowups')
      return output_filelist

    # loop over number of loudest events to be followed up
    num_events = int(workflow.cp.get_opt_tags('workflow-minifollowups', 'num-events', ''))
    for num_event in range(num_events):

        # increment by 1 for human readability
        num_event += 1

        # get output directory for this event
        tag_str = '_'.join(tags)
        output_dir = out_dir['result/loudest_event_%d_of_%d_%s'%(num_event, num_events, tag_str)]

        # make a pycbc_mf_table node for this event
        table_exe = MinifollowupsTableExecutable(workflow.cp, 'mf_table',
                        workflow.ifo_string, output_dir, tags=tags)
        table_node = table_exe.create_node(workflow.analysis_time, coinc_file,
                        tmpltbank_file, data_type, num_event)
        workflow.add_node(table_node)
        output_filelist.extend(table_node.output_files)

    logging.info('Leaving minifollowups module')

    return output_filelist
Example #4
0
def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False):
    """
    Retrieve files needed to run coh_PTF jobs within a PyGRB workflow

    Parameters
    ----------
    cp : pycbc.workflow.configuration.WorkflowConfigParser object
    The parsed configuration options of a pycbc.workflow.core.Workflow.

    ifos : str
    String containing the analysis interferometer IDs.

    run_dir : str
    The run directory, destination for retrieved files.

    bank_veto : Boolean
    If true, will retrieve the bank_veto_bank.xml file.

    summary_files : Boolean
    If true, will retrieve the summary page style files.

    Returns
    -------
    file_list : pycbc.workflow.FileList object
    A FileList containing the retrieved files.
    """
    if os.getenv("LAL_SRC") is None:
        raise ValueError("The environment variable LAL_SRC must be set to a "
                         "location containing the file lalsuite.git")
    else:
        lalDir = os.getenv("LAL_SRC")
        sci_seg = segments.segment(int(cp.get("workflow", "start-time")),
                                   int(cp.get("workflow", "end-time")))
        file_list = FileList([])

        # Bank veto
        if bank_veto:
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "bank_veto_bank.xml" % lalDir, "%s" % run_dir)
            bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir
            bank_veto = File(ifos,
                             "bank_veto_bank",
                             sci_seg,
                             file_url=bank_veto_url)
            bank_veto.PFN(bank_veto.cache_entry.path, site="local")
            file_list.extend(FileList([bank_veto]))

        if summary_files:
            # summary.js file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir)
            summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \
                             % run_dir
            summary_js = File(ifos,
                              "coh_PTF_html_summary_js",
                              sci_seg,
                              file_url=summary_js_url)
            summary_js.PFN(summary_js.cache_entry.path, site="local")
            file_list.extend(FileList([summary_js]))

            # summary.css file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir)
            summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \
                              % run_dir
            summary_css = File(ifos,
                               "coh_PTF_html_summary_css",
                               sci_seg,
                               file_url=summary_css_url)
            summary_css.PFN(summary_css.cache_entry.path, site="local")
            file_list.extend(FileList([summary_css]))

        return file_list
Example #5
0
def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False):
    """
    Retrieve files needed to run coh_PTF jobs within a PyGRB workflow

    Parameters
    ----------
    cp : pycbc.workflow.configuration.WorkflowConfigParser object
    The parsed configuration options of a pycbc.workflow.core.Workflow.

    ifos : str
    String containing the analysis interferometer IDs.

    run_dir : str
    The run directory, destination for retrieved files.

    bank_veto : Boolean
    If true, will retrieve the bank_veto_bank.xml file.

    summary_files : Boolean
    If true, will retrieve the summary page style files.

    Returns
    -------
    file_list : pycbc.workflow.FileList object
    A FileList containing the retrieved files.
    """
    if os.getenv("LAL_SRC") is None:
        raise ValueError("The environment variable LAL_SRC must be set to a "
                         "location containing the file lalsuite.git")
    else:
        lalDir = os.getenv("LAL_SRC")
        sci_seg = segments.segment(int(cp.get("workflow", "start-time")),
                                   int(cp.get("workflow", "end-time")))
        file_list = FileList([])

        # Bank veto
        if bank_veto:
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "bank_veto_bank.xml" % lalDir, "%s" % run_dir)
            bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir
            bank_veto = File(ifos, "bank_veto_bank", sci_seg,
                             file_url=bank_veto_url)
            bank_veto.PFN(bank_veto.cache_entry.path, site="local")
            file_list.extend(FileList([bank_veto]))

        if summary_files:
            # summary.js file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir)
            summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \
                             % run_dir
            summary_js = File(ifos, "coh_PTF_html_summary_js", sci_seg,
                              file_url=summary_js_url)
            summary_js.PFN(summary_js.cache_entry.path, site="local")
            file_list.extend(FileList([summary_js]))

            # summary.css file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir)
            summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \
                              % run_dir
            summary_css = File(ifos, "coh_PTF_html_summary_css", sci_seg,
                               file_url=summary_css_url)
            summary_css.PFN(summary_css.cache_entry.path, site="local")
            file_list.extend(FileList([summary_css]))

        return file_list
def setup_postproc_coh_PTF_workflow(workflow, trig_files, trig_cache,
                                    inj_trig_files, inj_files, inj_trig_caches,
                                    inj_caches, config_file, output_dir,
                                    html_dir, segment_dir, ifos, inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")


    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp, "injfinder", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp, "injcombiner", ifo=ifos,
                                             out_dir=output_dir, tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tag_str:
                injcombiner_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos, "foundmissed", full_segment,
                        extension="lcf", directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Initialise html_summary class
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            if out_tag == "OFFSOURCE":
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                            segment_dir, inj_file=curr_injs, tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for parent_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=parent_node._dax_node,
                                             child=sbv_plotter_node._dax_node)
                        workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(unclust_file,
                    segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                    offsource_clustered, segment_dir, tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(parent=injfinder_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = ' '.join(html_summary_node.get_command_line())
    open_box_cmd += "--open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Example #7
0
def setup_postproc_coh_PTF_workflow(workflow,
                                    trig_files,
                                    trig_cache,
                                    inj_trig_files,
                                    inj_files,
                                    inj_trig_caches,
                                    inj_caches,
                                    config_file,
                                    output_dir,
                                    html_dir,
                                    segment_dir,
                                    ifos,
                                    inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp,
                                         "injfinder",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp,
                                             "injcombiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos,
                        "foundmissed",
                        full_segment,
                        extension="lcf",
                        directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(
                        clust_file,
                        segment_dir,
                        inj_file=curr_injs,
                        tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(
                unclust_file, segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                                                          offsource_clustered,
                                                          segment_dir,
                                                          tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)

                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(
                            parent=injcombiner_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(
                            parent=injfinder_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = html_summary_node.executable.get_pfn() + " "
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Example #8
0
def setup_coincidence_workflow_ligolw_thinca(
        workflow, segsList, timeSlideFiles, inspiral_outs, output_dir,
        veto_cats=[2,3,4], tags=[], timeSlideTags=None,
        parallelize_split_input=False):
    """
    This function is used to setup a single-stage ihope style coincidence stage
    of the workflow using ligolw_sstinca (or compatible code!).

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The workflow instance that the coincidence jobs will be added to.
    segsList : pycbc.workflow.core.FileList
        The list of files returned by workflow's segment module that contains
        pointers to all the segment files generated in the workflow. If the
        coincidence code will be applying the data quality vetoes, then this
        will be used to ensure that the codes get the necessary input to do
        this.
    timeSlideFiles : pycbc.workflow.core.FileList
        An FileList of the timeSlide input files that are needed to
        determine what time sliding needs to be done. One of the timeSlideFiles
        will normally be "zero-lag only", the others containing time slides
        used to facilitate background computations later in the workflow.
    inspiral_outs : pycbc.workflow.core.FileList
        An FileList of the matched-filter module output that is used as
        input to the coincidence codes running at this stage.
    output_dir : path
        The directory in which coincidence output will be stored.
    veto_cats : list of ints (optional, default = [2,3,4])
        Veto categories that will be applied in the coincidence jobs. If this
        takes the default value the code will run data quality at cumulative 
        categories 2, 3 and 4. Note that if we change the flag definitions to
        be non-cumulative then this option will need to be revisited.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
    timeSlideTags : list of strings (optional, default = [])
        A list of the tags corresponding to the timeSlideFiles that are to be
        used in this call to the module. This can be used to ensure that the
        injection runs do no time sliding, but the no-injection runs do perform
        time slides (or vice-versa if you prefer!)
    Returns
    --------
    ligolwThincaOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_sstinca.
    ligolwAddOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_add.
    """
    from pylal import ligolw_cafe

    logging.debug("Entering coincidence module.")
    cp = workflow.cp
    ifoString = workflow.ifo_string

    # setup code for each veto_category

    coinc_outs = FileList([])
    other_outs = {}

    if not timeSlideTags:
        # Get all sections by looking in ini file, use all time slide files.
        timeSlideTags = [(sec.split('-')[-1]).upper()
                  for sec in workflow.cp.sections() if sec.startswith('tisi-')]

    if parallelize_split_input:
        # Want to split all input jobs according to their JOB%d tag.
        # This matches any string that is the letters JOB followed by some
        # numbers and nothing else.
        inspiral_outs_dict = {}
        regex_match = re.compile('JOB([0-9]+)\Z')
        for file in inspiral_outs:
            matches = [regex_match.match(tag) for tag in file.tags]
            # Remove non matching entries
            matches = [i for i in matches if i is not None]
            # Must have one entry
            if len(matches) == 0:
                warn_msg = "I was asked to parallelize over split inspiral "
                warn_msg += "files at the coincidence stage, but at least one "
                warn_msg += "input file does not have a JOB\%d tag indicating "
                warn_msg += "that it was split. Assuming that I do not have "
                warn_msg += "split input files and turning "
                warn_msg += "parallelize_split_input off."
                logging.warn(warn_msg)
                parallelize_split_input = False
                break
            if len(matches) > 1:
                err_msg = "One of my input files has two tags fitting JOB\%d "
                err_msg += "this means I cannot tell which split job this "
                err_msg += "file is from."
                raise ValueError(err_msg)
            # Extract the job ID
            id = int(matches[0].string[3:])
            if not inspiral_outs_dict.has_key(id):
                inspiral_outs_dict[id] = FileList([])
            inspiral_outs_dict[id].append(file)
        else:
            # If I got through all the files I want to sort the dictionaries so
            # that file with key a and index 3 is the same file as key b and
            # index 3 other than the tag is JOBA -> JOBB ... ie. it has used
            # a different part of the template bank.
            sort_lambda = lambda x: (x.ifo_string, x.segment,
                                     x.tagged_description)
            for key in inspiral_outs_dict.keys():
                inspiral_outs_dict[id].sort(key = sort_lambda)
            # These should be in ascending order, so I can assume the existence
            # of a JOB0 tag
            inspiral_outs = inspiral_outs_dict[0]
            for index, file in enumerate(inspiral_outs):
                # Store the index in the file for quicker mapping later
                file.thinca_index = index
    else:
        inspiral_outs_dict = None

    for timeSlideTag in timeSlideTags:
        # Get the time slide file from the inputs
        tisiOutFile = timeSlideFiles.find_output_with_tag(timeSlideTag)
        if not len(tisiOutFile) == 1:
            errMsg = "If you are seeing this, something batshit is going on!"
            if len(tisiOutFile) == 0:
                errMsg = "No time slide files found matching %s." \
                                                                %(timeSlideTag)
            if len(tisiOutFile) > 1:
                errMsg = "More than one time slide files match %s." \
                                                                %(timeSlideTag)
            raise ValueError(errMsg)
        tisiOutFile = tisiOutFile[0]

        # Next we run ligolw_cafe. This is responsible for
        # identifying what times will be used for the ligolw_thinca jobs and
        # what files are needed for each. If doing time sliding there
        # will be some triggers read into multiple jobs
        cacheInspOuts = inspiral_outs.convert_to_lal_cache()
        if workflow.cp.has_option_tags("workflow-coincidence", 
                                       "maximum-extent", tags):
            max_extent = float( workflow.cp.get_opt_tags(
                              "workflow-coincidence", "maximum-extent", tags) )
        else:
            # hard-coded default value for extent of time in a single job
            max_extent = 3600
        logging.debug("Calling into cafe.")
        time_slide_table = lsctables.TimeSlideTable.get_table(\
                ligolw_utils.load_filename(tisiOutFile.storage_path,
                                 gz=tisiOutFile.storage_path.endswith(".gz"),
                                 contenthandler=ContentHandler,
                                 verbose=False))
        time_slide_table.sync_next_id()
        time_slide_dict = time_slide_table.as_dict()

        cafe_seglists, cafe_caches = ligolw_cafe.ligolw_cafe(cacheInspOuts,
            time_slide_dict.values(), extentlimit=max_extent, verbose=False)
        logging.debug("Done with cafe.")

        # Take the combined seglist file
        dqSegFile=segsList.find_output_with_tag('COMBINED_CUMULATIVE_SEGMENTS')
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            print len(dqSegFile), dqSegFile
            raise ValueError(errMsg)
        dqSegFile=dqSegFile[0]

        # Set up llwadd job
        llwadd_tags = [timeSlideTag] + tags 
        ligolwadd_job = LigolwAddExecutable(cp, 'llwadd', ifo=ifoString,
                                          out_dir=output_dir, tags=llwadd_tags)
        ligolwAddOuts = FileList([])

        # Go global setup at each category
        # This flag will add a clustering job after ligolw_thinca
        if workflow.cp.has_option_tags("workflow-coincidence",
                                      "coincidence-post-cluster", llwadd_tags):
            coinc_post_cluster = True
        else:
            coinc_post_cluster = False

        # Go global setup at each category
        ligolwthinca_job = {}
        cluster_job = {}
        thinca_tags = {}
        for category in veto_cats:
            logging.debug("Preparing %s %s" %(timeSlideTag,category))
            dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(category)
            # FIXME: Should we resolve this now?
            # FIXME: Here we set the dqVetoName to be compatible with pipedown
            #        For pipedown must put the slide identifier first and
            #        dqVetoName last.
            pipedownDQVetoName = 'CAT_%d_VETO' %(category)
            curr_thinca_job_tags = [timeSlideTag] + tags + [pipedownDQVetoName]
            thinca_tags[category]=curr_thinca_job_tags
            # Set up jobs for ligolw_thinca
            ligolwthinca_job[category] = LigolwSSthincaExecutable(cp, 'thinca',
                                             ifo=ifoString, out_dir=output_dir,
                                             dqVetoName=dqVetoName,
                                             tags=curr_thinca_job_tags)
            if coinc_post_cluster:
                cluster_job[category] = SQLInOutExecutable(cp, 'pycbccluster',
                                             ifo=ifoString, out_dir=output_dir,
                                             tags=curr_thinca_job_tags)
        
        for idx, cafe_cache in enumerate(cafe_caches):
            ligolwAddOuts = FileList([])
            ligolwThincaOuts = FileList([])
            ligolwThincaLikelihoodOuts = FileList([])
            ligolwClusterOuts = FileList([])

            if not len(cafe_cache.objects):
                raise ValueError("One of the cache objects contains no files!")
        
            # Determine segments to accept coincidences.
            # If cache is not the first or last in the timeseries, check if the
            # two closes caches in the timeseries and see if their extent
            # match. If they match, they're adjacent and use the time where
            # they meet as a bound for accepting coincidences. If they're not
            # adjacent, then there is no bound for accepting coincidences.
            coincStart, coincEnd = None, None
            if idx and (cafe_cache.extent[0] == cafe_caches[idx-1].extent[1]):
                coincStart = cafe_cache.extent[0]
            if idx + 1 - len(cafe_caches) and \
                        (cafe_cache.extent[1] == cafe_caches[idx+1].extent[0]):
                coincEnd = cafe_cache.extent[1]
            coincSegment = (coincStart, coincEnd)
        
            # Need to create a list of the File(s) contained in the cache.
            # Assume that if we have partitioned input then if *one* job in the
            # partitioned input is an input then *all* jobs will be.
            if not parallelize_split_input:
                inputTrigFiles = FileList([])
                for object in cafe_cache.objects:
                    inputTrigFiles.append(object.workflow_file)
        
                llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]
        
                # Now we can create the nodes
                node = ligolwadd_job.create_node(cafe_cache.extent, llw_files)
                ligolwAddFile = node.output_files[0]
                ligolwAddOuts.append(ligolwAddFile)
                workflow.add_node(node)
                for category in veto_cats:
                    node = ligolwthinca_job[category].create_node(\
                                cafe_cache.extent, coincSegment, ligolwAddFile)
                    ligolwThincaOuts += \
                        node.output_files.find_output_without_tag('DIST_STATS')
                    ligolwThincaLikelihoodOuts += \
                           node.output_files.find_output_with_tag('DIST_STATS')
                    workflow.add_node(node)
                    if coinc_post_cluster:
                        node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                        ligolwClusterOuts += node.output_files
                        workflow.add_node(node)
            else:
                for key in inspiral_outs_dict.keys():
                    curr_tags = ["JOB%d" %(key)]
                    curr_list = inspiral_outs_dict[key]
                    inputTrigFiles = FileList([])
                    for object in cafe_cache.objects:
                        inputTrigFiles.append(
                                  curr_list[object.workflow_file.thinca_index])
        
                    llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]

                    # Now we can create the nodes
                    node = ligolwadd_job.create_node(cafe_cache.extent,
                                                     llw_files, tags=curr_tags)
                    ligolwAddFile = node.output_files[0]
                    ligolwAddOuts.append(ligolwAddFile)
                    workflow.add_node(node)
                    if workflow.cp.has_option_tags("workflow-coincidence",
                          "coincidence-write-likelihood",curr_thinca_job_tags):
                        write_likelihood=True
                    else:
                        write_likelihood=False
                    for category in veto_cats:
                        node = ligolwthinca_job[category].create_node(\
                             cafe_cache.extent, coincSegment, ligolwAddFile,
                             tags=curr_tags, write_likelihood=write_likelihood)
                        ligolwThincaOuts += \
                               node.output_files.find_output_without_tag(\
                                                                  'DIST_STATS')
                        ligolwThincaLikelihoodOuts += \
                              node.output_files.find_output_with_tag(\
                                                                  'DIST_STATS')
                        workflow.add_node(node)
                        if coinc_post_cluster:
                            node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                            ligolwClusterOuts += node.output_files
                            workflow.add_node(node)

            other_returns = {}
            other_returns['LIGOLW_ADD'] = ligolwAddOuts
            other_returns['DIST_STATS'] = ligolwThincaLikelihoodOuts
        
            if coinc_post_cluster:
                main_return = ligolwClusterOuts
                other_returns['THINCA'] = ligolwThincaOuts
            else:
                main_return = ligolwThincaOuts
        
            logging.debug("Done")
            coinc_outs.extend(main_return)
            for key, file_list in other_returns.items():
                if other_outs.has_key(key):
                    other_outs[key].extend(other_returns[key])
                else:
                    other_outs[key] = other_returns[key]
    return coinc_outs, other_outs
Example #9
0
def setup_coincidence_workflow_ligolw_thinca(workflow,
                                             segsList,
                                             timeSlideFiles,
                                             inspiral_outs,
                                             output_dir,
                                             veto_cats=[2, 3, 4],
                                             tags=[],
                                             timeSlideTags=None,
                                             parallelize_split_input=False):
    """
    This function is used to setup a single-stage ihope style coincidence stage
    of the workflow using ligolw_sstinca (or compatible code!).

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The workflow instance that the coincidence jobs will be added to.
    segsList : pycbc.workflow.core.FileList
        The list of files returned by workflow's segment module that contains
        pointers to all the segment files generated in the workflow. If the
        coincidence code will be applying the data quality vetoes, then this
        will be used to ensure that the codes get the necessary input to do
        this.
    timeSlideFiles : pycbc.workflow.core.FileList
        An FileList of the timeSlide input files that are needed to
        determine what time sliding needs to be done. One of the timeSlideFiles
        will normally be "zero-lag only", the others containing time slides
        used to facilitate background computations later in the workflow.
    inspiral_outs : pycbc.workflow.core.FileList
        An FileList of the matched-filter module output that is used as
        input to the coincidence codes running at this stage.
    output_dir : path
        The directory in which coincidence output will be stored.
    veto_cats : list of ints (optional, default = [2,3,4])
        Veto categories that will be applied in the coincidence jobs. If this
        takes the default value the code will run data quality at cumulative 
        categories 2, 3 and 4. Note that if we change the flag definitions to
        be non-cumulative then this option will need to be revisited.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
    timeSlideTags : list of strings (optional, default = [])
        A list of the tags corresponding to the timeSlideFiles that are to be
        used in this call to the module. This can be used to ensure that the
        injection runs do no time sliding, but the no-injection runs do perform
        time slides (or vice-versa if you prefer!)
    Returns
    --------
    ligolwThincaOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_sstinca.
    ligolwAddOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_add.
    """
    from pylal import ligolw_cafe

    logging.debug("Entering coincidence module.")
    cp = workflow.cp
    ifoString = workflow.ifo_string

    # setup code for each veto_category

    coinc_outs = FileList([])
    other_outs = {}

    if not timeSlideTags:
        # Get all sections by looking in ini file, use all time slide files.
        timeSlideTags = [(sec.split('-')[-1]).upper()
                         for sec in workflow.cp.sections()
                         if sec.startswith('tisi-')]

    if parallelize_split_input:
        # Want to split all input jobs according to their JOB%d tag.
        # This matches any string that is the letters JOB followed by some
        # numbers and nothing else.
        inspiral_outs_dict = {}
        regex_match = re.compile('JOB([0-9]+)\Z')
        for file in inspiral_outs:
            matches = [regex_match.match(tag) for tag in file.tags]
            # Remove non matching entries
            matches = [i for i in matches if i is not None]
            # Must have one entry
            if len(matches) == 0:
                warn_msg = "I was asked to parallelize over split inspiral "
                warn_msg += "files at the coincidence stage, but at least one "
                warn_msg += "input file does not have a JOB\%d tag indicating "
                warn_msg += "that it was split. Assuming that I do not have "
                warn_msg += "split input files and turning "
                warn_msg += "parallelize_split_input off."
                logging.warn(warn_msg)
                parallelize_split_input = False
                break
            if len(matches) > 1:
                err_msg = "One of my input files has two tags fitting JOB\%d "
                err_msg += "this means I cannot tell which split job this "
                err_msg += "file is from."
                raise ValueError(err_msg)
            # Extract the job ID
            id = int(matches[0].string[3:])
            if not inspiral_outs_dict.has_key(id):
                inspiral_outs_dict[id] = FileList([])
            inspiral_outs_dict[id].append(file)
        else:
            # If I got through all the files I want to sort the dictionaries so
            # that file with key a and index 3 is the same file as key b and
            # index 3 other than the tag is JOBA -> JOBB ... ie. it has used
            # a different part of the template bank.
            sort_lambda = lambda x: (x.ifo_string, x.segment, x.
                                     tagged_description)
            for key in inspiral_outs_dict.keys():
                inspiral_outs_dict[id].sort(key=sort_lambda)
            # These should be in ascending order, so I can assume the existence
            # of a JOB0 tag
            inspiral_outs = inspiral_outs_dict[0]
            for index, file in enumerate(inspiral_outs):
                # Store the index in the file for quicker mapping later
                file.thinca_index = index
    else:
        inspiral_outs_dict = None

    for timeSlideTag in timeSlideTags:
        # Get the time slide file from the inputs
        tisiOutFile = timeSlideFiles.find_output_with_tag(timeSlideTag)
        if not len(tisiOutFile) == 1:
            errMsg = "If you are seeing this, something batshit is going on!"
            if len(tisiOutFile) == 0:
                errMsg = "No time slide files found matching %s." \
                                                                %(timeSlideTag)
            if len(tisiOutFile) > 1:
                errMsg = "More than one time slide files match %s." \
                                                                %(timeSlideTag)
            raise ValueError(errMsg)
        tisiOutFile = tisiOutFile[0]

        # Next we run ligolw_cafe. This is responsible for
        # identifying what times will be used for the ligolw_thinca jobs and
        # what files are needed for each. If doing time sliding there
        # will be some triggers read into multiple jobs
        cacheInspOuts = inspiral_outs.convert_to_lal_cache()
        if workflow.cp.has_option_tags("workflow-coincidence",
                                       "maximum-extent", tags):
            max_extent = float(
                workflow.cp.get_opt_tags("workflow-coincidence",
                                         "maximum-extent", tags))
        else:
            # hard-coded default value for extent of time in a single job
            max_extent = 3600
        logging.debug("Calling into cafe.")
        time_slide_table = lsctables.TimeSlideTable.get_table(\
                ligolw_utils.load_filename(tisiOutFile.storage_path,
                                 gz=tisiOutFile.storage_path.endswith(".gz"),
                                 contenthandler=ContentHandler,
                                 verbose=False))
        time_slide_table.sync_next_id()
        time_slide_dict = time_slide_table.as_dict()

        cafe_seglists, cafe_caches = ligolw_cafe.ligolw_cafe(
            cacheInspOuts,
            time_slide_dict.values(),
            extentlimit=max_extent,
            verbose=False)
        logging.debug("Done with cafe.")

        # Take the combined seglist file
        dqSegFile = segsList.find_output_with_tag(
            'COMBINED_CUMULATIVE_SEGMENTS')
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            print len(dqSegFile), dqSegFile
            raise ValueError(errMsg)
        dqSegFile = dqSegFile[0]

        # Set up llwadd job
        llwadd_tags = [timeSlideTag] + tags
        ligolwadd_job = LigolwAddExecutable(cp,
                                            'llwadd',
                                            ifo=ifoString,
                                            out_dir=output_dir,
                                            tags=llwadd_tags)
        ligolwAddOuts = FileList([])

        # Go global setup at each category
        # This flag will add a clustering job after ligolw_thinca
        if workflow.cp.has_option_tags("workflow-coincidence",
                                       "coincidence-post-cluster",
                                       llwadd_tags):
            coinc_post_cluster = True
        else:
            coinc_post_cluster = False

        # Go global setup at each category
        ligolwthinca_job = {}
        cluster_job = {}
        thinca_tags = {}
        for category in veto_cats:
            logging.debug("Preparing %s %s" % (timeSlideTag, category))
            dqVetoName = 'VETO_CAT%d_CUMULATIVE' % (category)
            # FIXME: Should we resolve this now?
            # FIXME: Here we set the dqVetoName to be compatible with pipedown
            #        For pipedown must put the slide identifier first and
            #        dqVetoName last.
            pipedownDQVetoName = 'CAT_%d_VETO' % (category)
            curr_thinca_job_tags = [timeSlideTag] + tags + [pipedownDQVetoName]
            thinca_tags[category] = curr_thinca_job_tags
            # Set up jobs for ligolw_thinca
            ligolwthinca_job[category] = LigolwSSthincaExecutable(
                cp,
                'thinca',
                ifo=ifoString,
                out_dir=output_dir,
                dqVetoName=dqVetoName,
                tags=curr_thinca_job_tags)
            if coinc_post_cluster:
                cluster_job[category] = SQLInOutExecutable(
                    cp,
                    'pycbccluster',
                    ifo=ifoString,
                    out_dir=output_dir,
                    tags=curr_thinca_job_tags)

        for idx, cafe_cache in enumerate(cafe_caches):
            ligolwAddOuts = FileList([])
            ligolwThincaOuts = FileList([])
            ligolwThincaLikelihoodOuts = FileList([])
            ligolwClusterOuts = FileList([])

            if not len(cafe_cache.objects):
                raise ValueError("One of the cache objects contains no files!")

            # Determine segments to accept coincidences.
            # If cache is not the first or last in the timeseries, check if the
            # two closes caches in the timeseries and see if their extent
            # match. If they match, they're adjacent and use the time where
            # they meet as a bound for accepting coincidences. If they're not
            # adjacent, then there is no bound for accepting coincidences.
            coincStart, coincEnd = None, None
            if idx and (cafe_cache.extent[0]
                        == cafe_caches[idx - 1].extent[1]):
                coincStart = cafe_cache.extent[0]
            if idx + 1 - len(cafe_caches) and \
                        (cafe_cache.extent[1] == cafe_caches[idx+1].extent[0]):
                coincEnd = cafe_cache.extent[1]
            coincSegment = (coincStart, coincEnd)

            # Need to create a list of the File(s) contained in the cache.
            # Assume that if we have partitioned input then if *one* job in the
            # partitioned input is an input then *all* jobs will be.
            if not parallelize_split_input:
                inputTrigFiles = FileList([])
                for object in cafe_cache.objects:
                    inputTrigFiles.append(object.workflow_file)

                llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]

                # Now we can create the nodes
                node = ligolwadd_job.create_node(cafe_cache.extent, llw_files)
                ligolwAddFile = node.output_files[0]
                ligolwAddOuts.append(ligolwAddFile)
                workflow.add_node(node)
                for category in veto_cats:
                    node = ligolwthinca_job[category].create_node(\
                                cafe_cache.extent, coincSegment, ligolwAddFile)
                    ligolwThincaOuts += \
                        node.output_files.find_output_without_tag('DIST_STATS')
                    ligolwThincaLikelihoodOuts += \
                           node.output_files.find_output_with_tag('DIST_STATS')
                    workflow.add_node(node)
                    if coinc_post_cluster:
                        node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                        ligolwClusterOuts += node.output_files
                        workflow.add_node(node)
            else:
                for key in inspiral_outs_dict.keys():
                    curr_tags = ["JOB%d" % (key)]
                    curr_list = inspiral_outs_dict[key]
                    inputTrigFiles = FileList([])
                    for object in cafe_cache.objects:
                        inputTrigFiles.append(
                            curr_list[object.workflow_file.thinca_index])

                    llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]

                    # Now we can create the nodes
                    node = ligolwadd_job.create_node(cafe_cache.extent,
                                                     llw_files,
                                                     tags=curr_tags)
                    ligolwAddFile = node.output_files[0]
                    ligolwAddOuts.append(ligolwAddFile)
                    workflow.add_node(node)
                    if workflow.cp.has_option_tags(
                            "workflow-coincidence",
                            "coincidence-write-likelihood",
                            curr_thinca_job_tags):
                        write_likelihood = True
                    else:
                        write_likelihood = False
                    for category in veto_cats:
                        node = ligolwthinca_job[category].create_node(\
                             cafe_cache.extent, coincSegment, ligolwAddFile,
                             tags=curr_tags, write_likelihood=write_likelihood)
                        ligolwThincaOuts += \
                               node.output_files.find_output_without_tag(\
                                                                  'DIST_STATS')
                        ligolwThincaLikelihoodOuts += \
                              node.output_files.find_output_with_tag(\
                                                                  'DIST_STATS')
                        workflow.add_node(node)
                        if coinc_post_cluster:
                            node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                            ligolwClusterOuts += node.output_files
                            workflow.add_node(node)

            other_returns = {}
            other_returns['LIGOLW_ADD'] = ligolwAddOuts
            other_returns['DIST_STATS'] = ligolwThincaLikelihoodOuts

            if coinc_post_cluster:
                main_return = ligolwClusterOuts
                other_returns['THINCA'] = ligolwThincaOuts
            else:
                main_return = ligolwThincaOuts

            logging.debug("Done")
            coinc_outs.extend(main_return)
            for key, file_list in other_returns.items():
                if other_outs.has_key(key):
                    other_outs[key].extend(other_returns[key])
                else:
                    other_outs[key] = other_returns[key]
    return coinc_outs, other_outs
Example #10
0
def setup_postproc_coh_PTF_online_workflow(workflow, trig_files, trig_cache,
        inj_trig_files, inj_files, inj_trig_caches, inj_caches, config_file,
        output_dir, html_dir, segment_dir, segs_plot, ifos, inj_tags=None,
        tags=None):
    """
    This module sets up a stripped down post-processing stage for the online
    workflow, using a coh_PTF style set up. This consists of running
    trig_combiner to find coherent triggers, and trig_cluster to cluster them.
    This process may be done in two stages to reduce memory requirements. It
    also runs injfinder to look for injections, and injcombiner to calculate
    injection statistics. Finally, efficiency and sbv_plotter jobs calculate
    efficiency and signal based veto statistics and make plots.
    
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the on/off source analysis jobs.
    trig_cache : pycbc.workflow.core.File
        A cache file pointing to the trigger files.
    inj_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files produced by injection jobs.
    inj_files : pycbc.workflow.core.FileList
        A FileList of the injection set files.
    inj_trig_caches : pycbc.workflow.core.FileList
        A FileList containing the cache files that point to the injection
        trigger files.
    inj_caches : pycbc.workflow.core.FileList
        A FileList containing cache files that point to the injection files.
    config_file : pycbc.workflow.core.File
        The parsed configuration file.
    output_dir : path
        The directory in which output files will be stored.
    html_dir : path
        The directory where the result webpage will be placed.
    segment_dir : path
        The directory in which data segment information is stored.
    segs_plot : pycbc.workflow.core.File
        The plot showing the analysis segments for each IFO around the GRB time.
        This is produced at the time of workflow generation.
    ifos : list
        A list containing the analysis interferometers.
    inj_tags : list
        List containing the strings used to uniquely identify the injection
        sets included in the analysis.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    pp_outs : pycbc.workflow.core.FileList
        A list of the output from this stage.
    """
    if inj_tags is None:
        inj_tags = []
    if tags is None:
        tags = []
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_class = select_generic_executable(workflow, "efficiency")

    #horizon_dist_class = select_generic_executable(workflow, "horizon_dist")

    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
            cp.has_option_tag("inspiral", "do-short-slides",
                              "coherent_no_injections"):
        trig_combiner_out_tags.extend(["ZEROLAG_OFF", "ZEROLAG_ALL"])

    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)

    # Do first stage of trig_combiner and trig_cluster jobs if desired
    if workflow.cp.has_option("workflow-postproc", "do-two-stage-clustering"):
        logging.info("Doing two-stage clustering.")
        trig_combiner_s1_jobs = trig_combiner_class(cp, "trig_combiner",
                ifo=ifos, out_dir=output_dir, tags=tags+["INTERMEDIATE"])

        num_stage_one_jobs = int(workflow.cp.get("workflow-postproc",
            "num-stage-one-cluster-jobs"))
        num_inputs_per_job = -(-len(trig_files) // num_stage_one_jobs)
        split_trig_files = (trig_files[p:p + num_inputs_per_job] for p in \
                            xrange(0, len(trig_files), num_inputs_per_job))
        trig_cluster_s1_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                out_dir=output_dir, tags=tags+["INTERMEDIATE"])
        trig_cluster_s1_nodes = []
        trig_cluster_s1_outs = FileList([])
        for j, s1_inputs in zip(range(num_stage_one_jobs), split_trig_files):
            trig_combiner_s1_node, trig_combiner_s1_outs = \
                    trig_combiner_s1_jobs.create_node(s1_inputs,
                            segment_dir, workflow.analysis_time,
                            out_tags=trig_combiner_out_tags, tags=tags+[str(j)])
            pp_nodes.append(trig_combiner_s1_node)
            workflow.add_node(trig_combiner_s1_node)

            unclust_file = [f for f in trig_combiner_s1_outs \
                            if "ALL_TIMES" in f.tag_str][0]
            trig_cluster_s1_node, curr_outs = trig_cluster_s1_jobs.create_node(\
                    unclust_file)
            trig_cluster_s1_outs.extend(curr_outs)
            clust_file = curr_outs[0]
            trig_cluster_s1_node.set_memory(1300)
            trig_cluster_s1_nodes.append(trig_cluster_s1_node)
            pp_nodes.append(trig_cluster_s1_node)
            workflow.add_node(trig_cluster_s1_node)
            dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                 child=trig_cluster_s1_node._dax_node)
            workflow._adag.addDependency(dep)

        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                        segment_dir, workflow.analysis_time,
                        out_tags=trig_combiner_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)
        for trig_cluster_s1_node in trig_cluster_s1_nodes:
            dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                 child=trig_combiner_node._dax_node)
            workflow._adag.addDependency(dep)

    else:
        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_files, segment_dir,
                        workflow.analysis_time, out_tags=trig_combiner_out_tags,
                        tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp, "injfinder", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp, "injcombiner", ifo=ifos,
                                             out_dir=output_dir, tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos, "foundmissed", full_segment,
                        extension="lcf", directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([f for f in injfinder_outs \
                                     if "DETECTION" in f.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [i.tag_str.rsplit('_', 1)[0] for i in \
                                injcombiner_outs if "FOUND" in i.tag_str]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag.replace("INJ", "")
            inputs = FileList([f for f in injfinder_outs \
                               if injcombiner_tag in f.tagged_description])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s"
                                        % (inj_str.split(max_inc)[0], max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [f for f in trig_combiner_outs \
                        if out_tag in f.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE":
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                            segment_dir, inj_file=curr_injs, tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(parent=parent_node._dax_node,
                                    child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(parent=parent_node._dax_node,
                                    child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                    offsource_clustered, segment_dir, tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)
                    
                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(parent=injfinder_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [f for f in trig_combiner_outs \
                        if trial_tag in f.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                seg_plot=segs_plot, html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                seg_plot=segs_plot, html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    try:
        open_box_cmd = html_summary_node.executable.get_pfn() + " "
    except:
        exe_path = html_summary_node.executable.get_pfn('nonlocal').replace(\
                "https", "http")
        exe_name = exe_path.rsplit('/', 1)[-1]
        open_box_cmd = "wget %s\n" % exe_path
        open_box_cmd += "chmod 500 ./%s\n./%s " % (exe_name, exe_name)
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Example #11
0
def setup_postproc_coh_PTF_offline_workflow(workflow,
                                            trig_files,
                                            trig_cache,
                                            ts_trig_files,
                                            inj_trig_files,
                                            inj_files,
                                            inj_trig_caches,
                                            inj_caches,
                                            config_file,
                                            output_dir,
                                            html_dir,
                                            segment_dir,
                                            segs_plot,
                                            ifos,
                                            inj_tags=None,
                                            tags=None):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the on/off source analysis jobs.
    trig_cache : pycbc.workflow.core.File
        A cache file pointing to the trigger files.
    ts_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the timeslide analysis jobs.
    inj_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files produced by injection jobs.
    inj_files : pycbc.workflow.core.FileList
        A FileList of the injection set files.
    inj_trig_caches : pycbc.workflow.core.FileList
        A FileList containing the cache files that point to the injection
        trigger files.
    inj_caches : pycbc.workflow.core.FileList
        A FileList containing cache files that point to the injection files.
    config_file : pycbc.workflow.core.File
        The parsed configuration file.
    output_dir : path
        The directory in which output files will be stored.
    html_dir : path
        The directory where the result webpage will be placed.
    segment_dir : path
        The directory in which data segment information is stored.
    segs_plot : pycbc.workflow.core.File
        The plot showing the analysis segments for each IFO around the GRB time.
        This is produced at the time of workflow generation.
    ifos : list
        A list containing the analysis interferometers.
    inj_tags : list
        List containing the strings used to uniquely identify the injection
        sets included in the analysis.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    pp_outs : pycbc.workflow.core.FileList
        A list of the output from this stage.
    """
    if inj_tags is None:
        inj_tags = []
    if tags is None:
        tags = []
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))
    do_injections = cp.has_section("workflow-injections")

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_class = select_generic_executable(workflow, "efficiency")

    #horizon_dist_class = select_generic_executable(workflow, "horizon_dist")

    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up injection jobs if desired
    if do_injections:
        workflow, injfinder_nodes, injfinder_outs, fm_cache, \
                injcombiner_nodes, injcombiner_outs, injcombiner_out_tags, \
                inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs = \
                setup_coh_PTF_injections_pp(workflow, inj_trig_files,
                        inj_files, inj_trig_caches, inj_caches, pp_nodes,
                        pp_outs, inj_tags, output_dir, segment_dir, ifos,
                        tags=tags)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Set up main trig_combiner class and tags
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    slides = all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
            cp.has_option_tag("inspiral", "do-short-slides",
                              "coherent_no_injections")
    if slides:
        trig_combiner_out_tags.extend(["ZEROLAG_OFF", "ZEROLAG_ALL"])

    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

    # Do first stage of trig_combiner and trig_cluster jobs if desired
    if workflow.cp.has_option("workflow-postproc", "do-two-stage-clustering"):
        logging.info("Doing two-stage clustering.")
        trig_combiner_s1_jobs = trig_combiner_class(cp,
                                                    "trig_combiner",
                                                    ifo=ifos,
                                                    out_dir=output_dir,
                                                    tags=tags +
                                                    ["INTERMEDIATE"])

        num_stage_one_jobs = int(
            workflow.cp.get("workflow-postproc", "num-stage-one-cluster-jobs"))
        num_inputs_per_job = -(-len(trig_files) // num_stage_one_jobs)
        split_trig_files = (trig_files[p:p + num_inputs_per_job] for p in \
                            xrange(0, len(trig_files), num_inputs_per_job))
        trig_cluster_s1_jobs = trig_cluster_class(cp,
                                                  "trig_cluster",
                                                  ifo=ifos,
                                                  out_dir=output_dir,
                                                  tags=tags + ["INTERMEDIATE"])
        trig_cluster_s1_nodes = []
        trig_cluster_s1_outs = FileList([])
        for j, s1_inputs in zip(range(num_stage_one_jobs), split_trig_files):
            trig_combiner_s1_node, trig_combiner_s1_outs = \
                    trig_combiner_s1_jobs.create_node(s1_inputs,
                            segment_dir, workflow.analysis_time,
                            out_tags=trig_combiner_out_tags, tags=tags+[str(j)])
            pp_nodes.append(trig_combiner_s1_node)
            workflow.add_node(trig_combiner_s1_node)

            unclust_file = [f for f in trig_combiner_s1_outs \
                            if "ALL_TIMES" in f.tag_str][0]
            trig_cluster_s1_node, curr_outs = trig_cluster_s1_jobs.create_node(\
                    unclust_file)
            trig_cluster_s1_outs.extend(curr_outs)
            clust_file = curr_outs[0]
            trig_cluster_s1_nodes.append(trig_cluster_s1_node)
            pp_nodes.append(trig_cluster_s1_node)
            workflow.add_node(trig_cluster_s1_node)
            dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                 child=trig_cluster_s1_node._dax_node)
            workflow._adag.addDependency(dep)

        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                        segment_dir, workflow.analysis_time,
                        out_tags=trig_combiner_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)
        for trig_cluster_s1_node in trig_cluster_s1_nodes:
            dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                 child=trig_combiner_node._dax_node)
            workflow._adag.addDependency(dep)

    else:
        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_files, segment_dir,
                        workflow.analysis_time, out_tags=trig_combiner_out_tags,
                        tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Set up trig_cluster jobs
    trig_cluster_nodes = []
    for out_tag in trig_combiner_out_tags:
        unclust_file = [f for f in trig_combiner_outs \
                        if out_tag in f.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        trig_cluster_nodes.append(trig_cluster_node)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)
        # Are we not doing time slides?
        if ts_trig_files is None:
            if out_tag == "OFFSOURCE":
                off_node = trig_cluster_node
                offsource_clustered = clust_file

            # Add sbv_plotter and efficiency jobs
            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(
                workflow,
                unclust_file,
                clust_file,
                sbv_plotter_jobs,
                efficiency_jobs,
                inj_efficiency_jobs,
                off_node,
                trig_cluster_node,
                offsource_clustered,
                injfinder_nodes,
                injcombiner_nodes,
                injcombiner_outs,
                inj_sbv_plotter_parent_nodes,
                inj_tags,
                injcombiner_out_tags,
                pp_nodes,
                output_dir,
                segment_dir,
                ifos,
                out_tag,
                do_injs=do_injections,
                tags=tags)

    # If doing time slides
    if ts_trig_files is not None:
        trig_combiner_ts_nodes = []
        trig_cluster_ts_nodes = []
        trig_cluster_all_times_nodes = []
        ts_all_times_outs = FileList(
            [out for out in trig_cluster_outs if "ALL_TIMES" in out.tag_str])
        trig_combiner_ts_out_tags = ["ALL_TIMES", "OFFSOURCE"]
        ts_tags = list(
            set([[ts_tag for ts_tag in ts_trig_file.tags
                  if "SLIDE" in ts_tag][0] for ts_trig_file in ts_trig_files]))
        for ts_tag in ts_tags:
            # Do one slide at a time
            ts_trigs = FileList([
                ts_trig_file for ts_trig_file in ts_trig_files
                if ts_tag in ts_trig_file.tags
            ])

            # And do two-stage clustering if desired
            if workflow.cp.has_option("workflow-postproc",
                                      "do-two-stage-clustering"):

                split_trig_files = (
                    ts_trigs[p:p + num_inputs_per_job]
                    for p in xrange(0, len(ts_trigs), num_inputs_per_job))
                trig_cluster_s1_nodes = []
                trig_cluster_s1_outs = FileList([])
                for j, s1_inputs in zip(range(num_stage_one_jobs),
                                        split_trig_files):
                    trig_combiner_s1_node, trig_combiner_s1_outs = \
                            trig_combiner_s1_jobs.create_node(s1_inputs,
                                     segment_dir, workflow.analysis_time,
                                     out_tags=trig_combiner_ts_out_tags,
                                     slide_tag=ts_tag, tags=tags+[str(j)])
                    pp_nodes.append(trig_combiner_s1_node)
                    workflow.add_node(trig_combiner_s1_node)

                    unclust_file = [f for f in trig_combiner_s1_outs \
                                    if "ALL_TIMES" in f.tag_str][0]
                    trig_cluster_s1_node, curr_outs = \
                            trig_cluster_s1_jobs.create_node(unclust_file)
                    trig_cluster_s1_outs.extend(curr_outs)
                    clust_file = curr_outs[0]
                    trig_cluster_s1_nodes.append(trig_cluster_s1_node)
                    pp_nodes.append(trig_cluster_s1_node)
                    workflow.add_node(trig_cluster_s1_node)
                    dep = dax.Dependency(
                        parent=trig_combiner_s1_node._dax_node,
                        child=trig_cluster_s1_node._dax_node)
                    workflow._adag.addDependency(dep)

                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                                segment_dir, workflow.analysis_time,
                                slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)
                for trig_cluster_s1_node in trig_cluster_s1_nodes:
                    dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                         child=trig_combiner_ts_node._dax_node)
                    workflow._adag.addDependency(dep)
            else:
                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(ts_trigs, segment_dir,
                                workflow.analysis_time, slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)

            # Set up trig cluster jobs for each timeslide
            for ts_out_tag in trig_combiner_ts_out_tags:
                unclust_file = [f for f in trig_combiner_ts_outs \
                                if ts_out_tag in f.tag_str][0]
                trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                        unclust_file)
                trig_cluster_outs.extend(curr_outs)
                clust_file = curr_outs[0]
                trig_cluster_ts_nodes.append(trig_cluster_node)
                pp_nodes.append(trig_cluster_node)
                workflow.add_node(trig_cluster_node)
                dep = dax.Dependency(parent=trig_combiner_ts_node._dax_node,
                                     child=trig_cluster_node._dax_node)
                workflow._adag.addDependency(dep)
                if ts_out_tag == "ALL_TIMES":
                    trig_cluster_all_times_nodes.append(trig_cluster_node)
                    ts_all_times_outs.extend(FileList([clust_file]))

        # Combine all timeslides
        trig_combiner_all_node, trig_combiner_all_outs = \
                trig_combiner_jobs.create_node(ts_all_times_outs, segment_dir,
                            workflow.analysis_time, slide_tag="ALL_SLIDES",
                            out_tags=trig_combiner_ts_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_all_node)
        workflow.add_node(trig_combiner_all_node)
        for trig_cluster_ts_node in trig_cluster_all_times_nodes:
            dep = dax.Dependency(parent=trig_cluster_ts_node._dax_node,
                                 child=trig_combiner_all_node._dax_node)
            workflow._adag.addDependency(dep)

        for out_tag in trig_combiner_ts_out_tags:
            trig_cluster_outs = FileList(
                [f for f in trig_cluster_outs if out_tag not in f.tag_str])
        trig_cluster_outs.extend(trig_combiner_all_outs)
        off_node = trig_combiner_all_node
        offsource_clustered = [
            f for f in trig_cluster_outs
            if "OFFSOURCE" in f.tag_str and "ZERO_LAG" not in f.tag_str
        ][0]

        # Add sbv_plotter and efficiency jobs
        for out_tag in trig_combiner_out_tags:
            clust_file = [f for f in trig_cluster_outs \
                          if out_tag in f.tag_str][0]

            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(
                workflow,
                unclust_file,
                clust_file,
                sbv_plotter_jobs,
                efficiency_jobs,
                inj_efficiency_jobs,
                off_node,
                off_node,
                offsource_clustered,
                injfinder_nodes,
                injcombiner_nodes,
                injcombiner_outs,
                inj_sbv_plotter_parent_nodes,
                inj_tags,
                injcombiner_out_tags,
                pp_nodes,
                output_dir,
                segment_dir,
                ifos,
                out_tag,
                do_injs=do_injections,
                tags=tags)

    trial = 1
    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [f for f in trig_combiner_outs \
                        if trial_tag in f.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if do_injections:
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if do_injections:
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            seg_plot=segs_plot,
            html_dir=html_dir,
            time_slides=slides)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          seg_plot=segs_plot,
                                                          html_dir=html_dir,
                                                          time_slides=slides)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    try:
        open_box_cmd = html_summary_node.executable.get_pfn() + " "
    except:
        exe_path = html_summary_node.executable.get_pfn('nonlocal').replace(\
                "https", "http")
        exe_name = exe_path.rsplit('/', 1)[-1]
        open_box_cmd = "wget %s\n" % exe_path
        open_box_cmd += "chmod 500 ./%s\n./%s " % (exe_name, exe_name)
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Example #12
0
def setup_coh_PTF_injections_pp(wf,
                                inj_trigger_files,
                                inj_files,
                                inj_trigger_caches,
                                inj_caches,
                                pp_nodes,
                                pp_outs,
                                inj_tags,
                                out_dir,
                                seg_dir,
                                ifos,
                                tags=None):
    """
    Set up post processing for injections
    """
    injfinder_nodes = []
    injcombiner_parent_nodes = []
    inj_sbv_plotter_parent_nodes = []
    full_segment = inj_trigger_files[0].segment

    injfinder_exe = os.path.basename(wf.cp.get("executables", "injfinder"))
    injfinder_class = select_generic_executable(wf, "injfinder")
    injfinder_jobs = injfinder_class(wf.cp,
                                     "injfinder",
                                     ifo=ifos,
                                     out_dir=out_dir,
                                     tags=tags)

    injcombiner_exe = os.path.basename(wf.cp.get("executables", "injcombiner"))
    injcombiner_class = select_generic_executable(wf, "injcombiner")
    injcombiner_jobs = injcombiner_class(wf.cp,
                                         "injcombiner",
                                         ifo=ifos,
                                         out_dir=out_dir,
                                         tags=tags)

    injfinder_outs = FileList([])
    for inj_tag in inj_tags:
        triggers = FileList([file for file in inj_trigger_files \
                             if inj_tag in file.tag_str])
        injections = FileList([file for file in inj_files \
                               if inj_tag in file.tag_str])
        trig_cache = [file for file in inj_trigger_caches \
                      if inj_tag in file.tag_str][0]
        inj_cache = [file for file in inj_caches \
                     if inj_tag in file.tag_str][0]
        injfinder_node, curr_outs = injfinder_jobs.create_node(\
                triggers, injections, seg_dir, tags=[inj_tag])
        injfinder_nodes.append(injfinder_node)
        pp_nodes.append(injfinder_node)
        wf.add_node(injfinder_node)
        injfinder_outs.extend(curr_outs)
        if "DETECTION" not in curr_outs[0].tagged_description:
            injcombiner_parent_nodes.append(injfinder_node)
        else:
            inj_sbv_plotter_parent_nodes.append(injfinder_node)

    pp_outs.extend(injfinder_outs)

    # Make injfinder output cache
    fm_cache = File(ifos,
                    "foundmissed",
                    full_segment,
                    extension="lcf",
                    directory=out_dir)
    fm_cache.PFN(fm_cache.cache_entry.path, site="local")
    injfinder_outs.convert_to_lal_cache().tofile(\
            open(fm_cache.storage_path, "w"))
    pp_outs.extend(FileList([fm_cache]))

    # Set up injcombiner jobs
    injcombiner_outs = FileList([f for f in injfinder_outs \
                                 if "DETECTION" in f.tag_str])
    injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                        if "DETECTION" not in inj_tag]
    injcombiner_out_tags = [i.tag_str.rsplit('_', 1)[0] for i in \
                            injcombiner_outs if "FOUND" in i.tag_str]
    injcombiner_nodes = []

    for injcombiner_tag in injcombiner_tags:
        max_inc = wf.cp.get_opt_tags("injections", "max-inc",
                                     [injcombiner_tag])
        inj_str = injcombiner_tag.replace("INJ", "")
        inputs = FileList([f for f in injfinder_outs \
                           if injcombiner_tag in f.tagged_description])
        injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                fm_cache, inputs, inj_str, max_inc, wf.analysis_time)
        injcombiner_nodes.append(injcombiner_node)
        injcombiner_out_tags.append("%s_FILTERED_%s" %
                                    (inj_str.split(max_inc)[0], max_inc))
        injcombiner_outs.extend(curr_outs)
        pp_outs.extend(curr_outs)
        pp_nodes.append(injcombiner_node)
        wf.add_node(injcombiner_node)
        for parent_node in injcombiner_parent_nodes:
            dep = dax.Dependency(parent=parent_node._dax_node,
                                 child=injcombiner_node._dax_node)
            wf._adag.addDependency(dep)

    return (wf, injfinder_nodes, injfinder_outs, fm_cache, injcombiner_nodes,
            injcombiner_outs, injcombiner_out_tags,
            inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs)
Example #13
0
def setup_postproc_coh_PTF_offline_workflow(workflow, trig_files, trig_cache,
        ts_trig_files, inj_trig_files, inj_files, inj_trig_caches, inj_caches,
        config_file, output_dir, html_dir, segment_dir, segs_plot, ifos,
        inj_tags=None, tags=None):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the on/off source analysis jobs.
    trig_cache : pycbc.workflow.core.File
        A cache file pointing to the trigger files.
    ts_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the timeslide analysis jobs.
    inj_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files produced by injection jobs.
    inj_files : pycbc.workflow.core.FileList
        A FileList of the injection set files.
    inj_trig_caches : pycbc.workflow.core.FileList
        A FileList containing the cache files that point to the injection
        trigger files.
    inj_caches : pycbc.workflow.core.FileList
        A FileList containing cache files that point to the injection files.
    config_file : pycbc.workflow.core.File
        The parsed configuration file.
    output_dir : path
        The directory in which output files will be stored.
    html_dir : path
        The directory where the result webpage will be placed.
    segment_dir : path
        The directory in which data segment information is stored.
    segs_plot : pycbc.workflow.core.File
        The plot showing the analysis segments for each IFO around the GRB time.
        This is produced at the time of workflow generation.
    ifos : list
        A list containing the analysis interferometers.
    inj_tags : list
        List containing the strings used to uniquely identify the injection
        sets included in the analysis.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    pp_outs : pycbc.workflow.core.FileList
        A list of the output from this stage.
    """
    if inj_tags is None:
        inj_tags = []
    if tags is None:
        tags = []
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))
    do_injections = cp.has_section("workflow-injections")

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_class = select_generic_executable(workflow, "efficiency")

    #horizon_dist_class = select_generic_executable(workflow, "horizon_dist")

    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up injection jobs if desired
    if do_injections:
        workflow, injfinder_nodes, injfinder_outs, fm_cache, \
                injcombiner_nodes, injcombiner_outs, injcombiner_out_tags, \
                inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs = \
                setup_coh_PTF_injections_pp(workflow, inj_trig_files,
                        inj_files, inj_trig_caches, inj_caches, pp_nodes,
                        pp_outs, inj_tags, output_dir, segment_dir, ifos,
                        tags=tags)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Set up main trig_combiner class and tags
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    slides = all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
            cp.has_option_tag("inspiral", "do-short-slides",
                              "coherent_no_injections")
    if slides:
        trig_combiner_out_tags.extend(["ZEROLAG_OFF", "ZEROLAG_ALL"])
    
    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)

    # Do first stage of trig_combiner and trig_cluster jobs if desired
    if workflow.cp.has_option("workflow-postproc", "do-two-stage-clustering"):
        logging.info("Doing two-stage clustering.")
        trig_combiner_s1_jobs = trig_combiner_class(cp, "trig_combiner",
                ifo=ifos, out_dir=output_dir, tags=tags+["INTERMEDIATE"])

        num_stage_one_jobs = int(workflow.cp.get("workflow-postproc",
            "num-stage-one-cluster-jobs"))
        num_inputs_per_job = -(-len(trig_files) // num_stage_one_jobs)
        split_trig_files = (trig_files[p:p + num_inputs_per_job] for p in \
                            xrange(0, len(trig_files), num_inputs_per_job))
        trig_cluster_s1_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                out_dir=output_dir, tags=tags+["INTERMEDIATE"])
        trig_cluster_s1_nodes = []
        trig_cluster_s1_outs = FileList([])
        for j, s1_inputs in zip(range(num_stage_one_jobs), split_trig_files):
            trig_combiner_s1_node, trig_combiner_s1_outs = \
                    trig_combiner_s1_jobs.create_node(s1_inputs,
                            segment_dir, workflow.analysis_time,
                            out_tags=trig_combiner_out_tags, tags=tags+[str(j)])
            pp_nodes.append(trig_combiner_s1_node)
            workflow.add_node(trig_combiner_s1_node)

            unclust_file = [f for f in trig_combiner_s1_outs \
                            if "ALL_TIMES" in f.tag_str][0]
            trig_cluster_s1_node, curr_outs = trig_cluster_s1_jobs.create_node(\
                    unclust_file)
            trig_cluster_s1_outs.extend(curr_outs)
            clust_file = curr_outs[0]
            trig_cluster_s1_nodes.append(trig_cluster_s1_node)
            pp_nodes.append(trig_cluster_s1_node)
            workflow.add_node(trig_cluster_s1_node)
            dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                 child=trig_cluster_s1_node._dax_node)
            workflow._adag.addDependency(dep)

        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                        segment_dir, workflow.analysis_time,
                        out_tags=trig_combiner_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)
        for trig_cluster_s1_node in trig_cluster_s1_nodes:
            dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                 child=trig_combiner_node._dax_node)
            workflow._adag.addDependency(dep)

    else:
        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_files, segment_dir,
                        workflow.analysis_time, out_tags=trig_combiner_out_tags,
                        tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Set up trig_cluster jobs
    trig_cluster_nodes = []
    for out_tag in trig_combiner_out_tags:
        unclust_file = [f for f in trig_combiner_outs \
                        if out_tag in f.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        trig_cluster_nodes.append(trig_cluster_node)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)
        # Are we not doing time slides?
        if ts_trig_files is None:
            if out_tag == "OFFSOURCE":
                off_node = trig_cluster_node
                offsource_clustered = clust_file

            # Add sbv_plotter and efficiency jobs
            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(workflow, 
                    unclust_file, clust_file, sbv_plotter_jobs,
                    efficiency_jobs, inj_efficiency_jobs, off_node,
                    trig_cluster_node, offsource_clustered, injfinder_nodes,
                    injcombiner_nodes, injcombiner_outs,
                    inj_sbv_plotter_parent_nodes, inj_tags,
                    injcombiner_out_tags, pp_nodes, output_dir, segment_dir,
                    ifos, out_tag, do_injs=do_injections, tags=tags)

    # If doing time slides
    if ts_trig_files is not None:
        trig_combiner_ts_nodes = []
        trig_cluster_ts_nodes = []
        trig_cluster_all_times_nodes = []
        ts_all_times_outs = FileList([out for out in trig_cluster_outs
                                      if "ALL_TIMES" in out.tag_str])
        trig_combiner_ts_out_tags = ["ALL_TIMES", "OFFSOURCE"]
        ts_tags = list(set([[ts_tag for ts_tag in ts_trig_file.tags
                             if "SLIDE" in ts_tag][0]
                            for ts_trig_file in ts_trig_files]))
        for ts_tag in ts_tags:
            # Do one slide at a time
            ts_trigs = FileList([ts_trig_file for ts_trig_file in ts_trig_files
                                 if ts_tag in ts_trig_file.tags])

            # And do two-stage clustering if desired
            if workflow.cp.has_option("workflow-postproc",
                                      "do-two-stage-clustering"):

                split_trig_files = (ts_trigs[p:p + num_inputs_per_job]
                        for p in xrange(0, len(ts_trigs), num_inputs_per_job))
                trig_cluster_s1_nodes = []
                trig_cluster_s1_outs = FileList([])
                for j, s1_inputs in zip(range(num_stage_one_jobs),
                                        split_trig_files):
                    trig_combiner_s1_node, trig_combiner_s1_outs = \
                            trig_combiner_s1_jobs.create_node(s1_inputs,
                                     segment_dir, workflow.analysis_time,
                                     out_tags=trig_combiner_ts_out_tags,
                                     slide_tag=ts_tag, tags=tags+[str(j)])
                    pp_nodes.append(trig_combiner_s1_node)
                    workflow.add_node(trig_combiner_s1_node)

                    unclust_file = [f for f in trig_combiner_s1_outs \
                                    if "ALL_TIMES" in f.tag_str][0]
                    trig_cluster_s1_node, curr_outs = \
                            trig_cluster_s1_jobs.create_node(unclust_file)
                    trig_cluster_s1_outs.extend(curr_outs)
                    clust_file = curr_outs[0]
                    trig_cluster_s1_nodes.append(trig_cluster_s1_node)
                    pp_nodes.append(trig_cluster_s1_node)
                    workflow.add_node(trig_cluster_s1_node)
                    dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                         child=trig_cluster_s1_node._dax_node)
                    workflow._adag.addDependency(dep)

                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                                segment_dir, workflow.analysis_time,
                                slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)
                for trig_cluster_s1_node in trig_cluster_s1_nodes:
                    dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                         child=trig_combiner_ts_node._dax_node)
                    workflow._adag.addDependency(dep)
            else:
                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(ts_trigs, segment_dir,
                                workflow.analysis_time, slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)

            # Set up trig cluster jobs for each timeslide
            for ts_out_tag in trig_combiner_ts_out_tags:
                unclust_file = [f for f in trig_combiner_ts_outs \
                                if ts_out_tag in f.tag_str][0]
                trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                        unclust_file)
                trig_cluster_outs.extend(curr_outs)
                clust_file = curr_outs[0]
                trig_cluster_ts_nodes.append(trig_cluster_node)
                pp_nodes.append(trig_cluster_node)
                workflow.add_node(trig_cluster_node)
                dep = dax.Dependency(parent=trig_combiner_ts_node._dax_node,
                                     child=trig_cluster_node._dax_node)
                workflow._adag.addDependency(dep)        
                if ts_out_tag == "ALL_TIMES":
                    trig_cluster_all_times_nodes.append(trig_cluster_node)
                    ts_all_times_outs.extend(FileList([clust_file]))

        # Combine all timeslides
        trig_combiner_all_node, trig_combiner_all_outs = \
                trig_combiner_jobs.create_node(ts_all_times_outs, segment_dir,
                            workflow.analysis_time, slide_tag="ALL_SLIDES",
                            out_tags=trig_combiner_ts_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_all_node)
        workflow.add_node(trig_combiner_all_node)
        for trig_cluster_ts_node in trig_cluster_all_times_nodes:
            dep = dax.Dependency(parent=trig_cluster_ts_node._dax_node,
                                 child=trig_combiner_all_node._dax_node)
            workflow._adag.addDependency(dep)        

        for out_tag in trig_combiner_ts_out_tags:
            trig_cluster_outs = FileList([f for f in trig_cluster_outs
                                          if out_tag not in f.tag_str])
        trig_cluster_outs.extend(trig_combiner_all_outs)
        off_node = trig_combiner_all_node
        offsource_clustered = [f for f in trig_cluster_outs
                               if "OFFSOURCE" in f.tag_str
                               and "ZERO_LAG" not in f.tag_str][0]

        # Add sbv_plotter and efficiency jobs
        for out_tag in trig_combiner_out_tags:
            clust_file = [f for f in trig_cluster_outs \
                          if out_tag in f.tag_str][0]

            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(workflow, 
                    unclust_file, clust_file, sbv_plotter_jobs,
                    efficiency_jobs, inj_efficiency_jobs, off_node, off_node,
                    offsource_clustered, injfinder_nodes, injcombiner_nodes,
                    injcombiner_outs, inj_sbv_plotter_parent_nodes, inj_tags,
                    injcombiner_out_tags, pp_nodes, output_dir, segment_dir,
                    ifos, out_tag, do_injs=do_injections, tags=tags)

    trial = 1
    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [f for f in trig_combiner_outs \
                        if trial_tag in f.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if do_injections:
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if do_injections:
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                seg_plot=segs_plot, html_dir=html_dir, time_slides=slides)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                seg_plot=segs_plot, html_dir=html_dir, time_slides=slides)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    try:
        open_box_cmd = html_summary_node.executable.get_pfn() + " "
    except:
        exe_path = html_summary_node.executable.get_pfn('nonlocal').replace(\
                "https", "http")
        exe_name = exe_path.rsplit('/', 1)[-1]
        open_box_cmd = "wget %s\n" % exe_path
        open_box_cmd += "chmod 500 ./%s\n./%s " % (exe_name, exe_name)
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Example #14
0
def setup_coh_PTF_injections_pp(wf, inj_trigger_files, inj_files,
                                inj_trigger_caches, inj_caches,
                                pp_nodes, pp_outs, inj_tags, out_dir, seg_dir,
                                ifos, tags=None):
    """
    Set up post processing for injections
    """
    injfinder_nodes = []
    injcombiner_parent_nodes = []
    inj_sbv_plotter_parent_nodes = []
    full_segment = inj_trigger_files[0].segment

    injfinder_exe = os.path.basename(wf.cp.get("executables", "injfinder"))
    injfinder_class = select_generic_executable(wf, "injfinder")
    injfinder_jobs = injfinder_class(wf.cp, "injfinder", ifo=ifos,
                                     out_dir=out_dir, tags=tags)

    injcombiner_exe = os.path.basename(wf.cp.get("executables", "injcombiner"))
    injcombiner_class = select_generic_executable(wf, "injcombiner")
    injcombiner_jobs = injcombiner_class(wf.cp, "injcombiner", ifo=ifos,
                                         out_dir=out_dir, tags=tags)

    injfinder_outs = FileList([])
    for inj_tag in inj_tags:
        triggers = FileList([file for file in inj_trigger_files \
                             if inj_tag in file.tag_str])
        injections = FileList([file for file in inj_files \
                               if inj_tag in file.tag_str])
        trig_cache = [file for file in inj_trigger_caches \
                      if inj_tag in file.tag_str][0]
        inj_cache = [file for file in inj_caches \
                     if inj_tag in file.tag_str][0]
        injfinder_node, curr_outs = injfinder_jobs.create_node(\
                triggers, injections, seg_dir, tags=[inj_tag])
        injfinder_nodes.append(injfinder_node)
        pp_nodes.append(injfinder_node)
        wf.add_node(injfinder_node)
        injfinder_outs.extend(curr_outs)
        if "DETECTION" not in curr_outs[0].tagged_description:
            injcombiner_parent_nodes.append(injfinder_node)
        else:
            inj_sbv_plotter_parent_nodes.append(injfinder_node)

    pp_outs.extend(injfinder_outs)

    # Make injfinder output cache
    fm_cache = File(ifos, "foundmissed", full_segment,
                    extension="lcf", directory=out_dir)
    fm_cache.PFN(fm_cache.cache_entry.path, site="local")
    injfinder_outs.convert_to_lal_cache().tofile(\
            open(fm_cache.storage_path, "w"))
    pp_outs.extend(FileList([fm_cache]))

    # Set up injcombiner jobs
    injcombiner_outs = FileList([f for f in injfinder_outs \
                                 if "DETECTION" in f.tag_str])
    injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                        if "DETECTION" not in inj_tag]
    injcombiner_out_tags = [i.tag_str.rsplit('_', 1)[0] for i in \
                            injcombiner_outs if "FOUND" in i.tag_str]
    injcombiner_nodes = []

    for injcombiner_tag in injcombiner_tags:
        max_inc = wf.cp.get_opt_tags("injections", "max-inc",
                                     [injcombiner_tag])
        inj_str = injcombiner_tag.replace("INJ", "")
        inputs = FileList([f for f in injfinder_outs \
                           if injcombiner_tag in f.tagged_description])
        injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                fm_cache, inputs, inj_str, max_inc, wf.analysis_time)
        injcombiner_nodes.append(injcombiner_node)
        injcombiner_out_tags.append("%s_FILTERED_%s"
                                    % (inj_str.split(max_inc)[0], max_inc))
        injcombiner_outs.extend(curr_outs)
        pp_outs.extend(curr_outs)
        pp_nodes.append(injcombiner_node)
        wf.add_node(injcombiner_node)
        for parent_node in injcombiner_parent_nodes:
            dep = dax.Dependency(parent=parent_node._dax_node,
                                 child=injcombiner_node._dax_node)
            wf._adag.addDependency(dep)

    return (wf, injfinder_nodes, injfinder_outs, fm_cache, injcombiner_nodes,
            injcombiner_outs, injcombiner_out_tags,
            inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs)