Beispiel #1
0
def make_segments_plot(workflow, seg_files, out_dir, tags=[]):
    make_analysis_dir(out_dir)
    node = PlotExecutable(workflow.cp, 'plot_segments', ifos=workflow.ifos,
                         out_dir=out_dir, tags=tags).create_node()
    node.add_input_list_opt('--segment-files', seg_files)
    node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file')
    workflow += node
Beispiel #2
0
def make_snrifar_plot(workflow, bg_file, out_dir, tags=[]):
    make_analysis_dir(out_dir)
    node = PlotExecutable(workflow.cp, 'plot_snrifar', ifos=workflow.ifos,
                out_dir=out_dir, tags=tags).create_node()
    node.add_input_opt('--trigger-file', bg_file)
    node.new_output_file_opt(bg_file.segment, '.png', '--output-file')
    workflow += node
Beispiel #3
0
def setup_postprocessing_preparation(workflow, triggerFiles, output_dir,
                                     tags=[], **kwargs):
    """
    This function aims to be the gateway for preparing the output of the
    coincidence and/or matched-filtering stages of the workflow for calculation 
    of the significance of triggers and any rate statements that are to made. In
    practice this normally means combining output files, performing any
    clustering and performing mapping between triggers and simulations where
    needed.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    triggerFiles : pycbc.workflow.core.FileList
        An FileList of the trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    postProcPreppedFiles : pycbc.workflow.core.FileList
        A list of files that can be used as input for the post-processing stage.
    """
    logging.info("Entering post-processing preparation module.")
    make_analysis_dir(output_dir)

    # Parse for options in .ini file
    postProcPrepMethod = workflow.cp.get_opt_tags("workflow-postprocprep",
                                        "postprocprep-method", tags)

    # Scope here for adding different options/methods here. For now we only
    # have the single_stage ihope method which consists of converting the
    # ligolw_thinca output xml into one file, clustering, performing injection
    # finding and putting everything into one SQL database.
    if postProcPrepMethod == "PIPEDOWN_WORKFLOW":
        # If you want the intermediate output files, call this directly
        postPostPreppedFiles,_,_,_ = setup_postprocprep_pipedown_workflow(
                                       workflow, triggerFiles, output_dir,
                                       tags=tags, **kwargs)
    elif postProcPrepMethod == "PIPEDOWN_REPOP":
        postPostPreppedFiles,_,_,_ = setup_postprocprep_pipedown_workflow(
                                       workflow, triggerFiles, output_dir,
                                       tags=tags, do_repop=True, **kwargs)
    elif postProcPrepMethod == "GSTLAL_POSTPROCPREP":
        postPostPreppedFiles = setup_postprocprep_gstlal_workflow(workflow,
                                 triggerFiles, output_dir, tags=tags, **kwargs)
    else:
        errMsg = "Post-processing preparation method not recognized. Must be "
        errMsg += "one of PIPEDOWN_WORKFLOW or GSTLAL_POSTPROCPREP."
        raise ValueError(errMsg)

    logging.info("Leaving post-processing preparation module.")

    return postPostPreppedFiles
Beispiel #4
0
def make_average_psd(workflow, psd_files, out_dir, tags=None,
                     gate_files=None,
                     output_fmt='.txt'):
    make_analysis_dir(out_dir)
    tags = [] if tags is None else tags
    node = AvgPSDExecutable(workflow.cp, 'average_psd', ifos=workflow.ifos,
                            out_dir=out_dir, tags=tags).create_node()
    node.add_input_list_opt('--input-files', psd_files)
    node.new_output_file_opt(workflow.analysis_time, output_fmt,
                             '--detector-avg-file')

    # FIXME should Node have a public method for handling
    # multidetector output options of type --option H1:foo L1:bar?
    node.add_opt('--time-avg-file')
    for ifo in workflow.ifos:
        time_avg_file = File(ifo, node.executable.name, workflow.analysis_time,
                             extension=output_fmt, directory=out_dir,
                             tags=tags)
        multi_ifo_string = ifo + ':' + time_avg_file.name
        node.add_opt(multi_ifo_string)
        node._add_output(time_avg_file)
    
        if gate_files is not None:
            ifo_gate = None
            for gate_file in gate_files:
                if gate_file.ifo == ifo:
                    ifo_gate = gate_file
            
            if ifo_gate is not None:
                node.add_input_opt('--gating-file', ifo_gate)

    workflow += node
    return node.output_files
Beispiel #5
0
def make_average_psd(workflow,
                     psd_files,
                     out_dir,
                     tags=None,
                     output_fmt='.txt'):
    make_analysis_dir(out_dir)
    tags = [] if tags is None else tags
    node = AvgPSDExecutable(workflow.cp,
                            'average_psd',
                            ifos=workflow.ifos,
                            out_dir=out_dir,
                            tags=tags).create_node()
    node.add_input_list_opt('--input-files', psd_files)

    if len(workflow.ifos) > 1:
        node.new_output_file_opt(workflow.analysis_time, output_fmt,
                                 '--detector-avg-file')

    node.new_multiifo_output_list_opt('--time-avg-file',
                                      workflow.ifos,
                                      workflow.analysis_time,
                                      output_fmt,
                                      tags=tags)

    workflow += node
    return node.output_files
Beispiel #6
0
def setup_combine_statmap(workflow, final_bg_file_list, bg_file_list,
                          out_dir, tags=None):
    """
    Combine the statmap files into one background file
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up combine statmap')

    cstat_exe_name = os.path.basename(workflow.cp.get("executables",
                                                      "combine_statmap"))
    if cstat_exe_name == 'pycbc_combine_statmap':
        cstat_class = PyCBCCombineStatmap
    elif cstat_exe_name == 'pycbc_add_statmap':
        cstat_class = PyCBCAddStatmap
    else:
        raise NotImplementedError('executable should be '
            'pycbc_combine_statmap or pycbc_add_statmap')

    cstat_exe = cstat_class(workflow.cp, 'combine_statmap', ifos=workflow.ifos,
                            tags=tags, out_dir=out_dir)

    if cstat_exe_name == 'pycbc_combine_statmap':
        combine_statmap_node = cstat_exe.create_node(final_bg_file_list)
    elif cstat_exe_name == 'pycbc_add_statmap':
        combine_statmap_node = cstat_exe.create_node(final_bg_file_list,
                                                     bg_file_list)

    workflow.add_node(combine_statmap_node)
    return combine_statmap_node.output_file
Beispiel #7
0
def setup_exclude_zerolag(workflow,
                          statmap_file,
                          other_statmap_files,
                          out_dir,
                          ifos,
                          tags=None):
    """
    Exclude single triggers close to zerolag triggers from forming any
    background events
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up exclude zerolag')

    exc_zerolag_exe = PyCBCExcludeZerolag(workflow.cp,
                                          'exclude_zerolag',
                                          ifos=ifos,
                                          tags=tags,
                                          out_dir=out_dir)
    exc_zerolag_node = exc_zerolag_exe.create_node(statmap_file,
                                                   other_statmap_files,
                                                   tags=None)
    workflow.add_node(exc_zerolag_node)
    return exc_zerolag_node.output_file
Beispiel #8
0
def setup_psd_calculate(workflow, frame_files, ifo, segments,
                        segment_name, out_dir,
                        gate_files=None, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags):
        num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 
                                                 'parallelization-factor',
                                                 tags=tags))
    else:
        num_parts = 1
        
    # get rid of duplicate segments which happen when splitting the bank
    segments = segmentlist(frozenset(segments))       
        
    segment_lists = list(chunks(segments, num_parts)) 
    
    psd_files = FileList([])
    for i, segs in enumerate(segment_lists):
        seg_file = segments_to_file(segmentlist(segs), 
                               out_dir + '/%s-INSPIRAL_DATA-%s.xml' % (ifo, i), 
                               'INSPIRAL_DATA', ifo=ifo)

        psd_files += [make_psd_file(workflow, frame_files, seg_file,
                                    segment_name, out_dir, 
                                    gate_files=gate_files, 
                                    tags=tags + ['PART%s' % i])]
    
    if num_parts > 1:
        return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags)
    else:
        return psd_files[0]
Beispiel #9
0
def make_psd_file(workflow, frame_files, segment_file, segment_name, out_dir,
                  gate_files=None, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    exe = CalcPSDExecutable(workflow.cp, 'calculate_psd',
                             ifos=segment_file.ifo, out_dir=out_dir,
                             tags=tags)
    node = exe.create_node()
    node.add_input_opt('--analysis-segment-file', segment_file)
    node.add_opt('--segment-name', segment_name)
    
    if gate_files is not None:
        ifo_gate = None
        for gate_file in gate_files:
            if gate_file.ifo == segment_file.ifo:
                ifo_gate = gate_file
        
        if ifo_gate is not None:
            node.add_input_opt('--gating-file', ifo_gate)
    
    if not exe.has_opt('frame-type'):
        node.add_input_list_opt('--frame-files', frame_files)
    node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
    workflow += node
    return node.output_files[0]
Beispiel #10
0
def setup_psd_calculate(workflow, frame_files, ifo, segments,
                        segment_name, out_dir, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags):
        num_parts = int(workflow.cp.get_opt_tags('workflow-psd',
                                                 'parallelization-factor',
                                                 tags=tags))
    else:
        num_parts = 1

    # get rid of duplicate segments which happen when splitting the bank
    segments = segmentlist(frozenset(segments))

    segment_lists = list(chunks(segments, num_parts))

    psd_files = FileList([])
    for i, segs in enumerate(segment_lists):
        seg_file = SegFile.from_segment_list('%s_%s' %(segment_name, i),
                         segmentlist(segs), segment_name, ifo,
                         valid_segment=workflow.analysis_time,
                         extension='xml', directory=out_dir)

        psd_files += [make_psd_file(workflow, frame_files, seg_file,
                                    segment_name, out_dir,
                                    tags=tags + ['PART%s' % i])]

    if num_parts > 1:
        return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags)
    else:
        return psd_files[0]
Beispiel #11
0
def setup_psd_calculate(workflow, frame_files, ifo, segments,
                        segment_name, out_dir,
                        gate_files=None, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags):
        num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 
                                                 'parallelization-factor',
                                                 tags=tags))
    else:
        num_parts = 1
        
    segment_lists = list(chunks(segments, num_parts)) 
    
    psd_files = FileList([])
    for i, segs in enumerate(segment_lists):
        seg_file = segments_to_file(segmentlist(segs), 
                               out_dir + '/%s-INSPIRAL_DATA-%s.xml' % (ifo, i), 
                               'INSPIRAL_DATA', ifo=ifo)

        psd_files += [make_psd_file(workflow, frame_files, seg_file,
                                    segment_name, out_dir, 
                                    gate_files=gate_files, 
                                    tags=tags + ['PART%s' % i])]
    
    if num_parts > 1:
        return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags)
    else:
        return psd_files[0]
Beispiel #12
0
def make_psd_file(workflow,
                  frame_files,
                  segment_file,
                  segment_name,
                  out_dir,
                  gate_files=None,
                  tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    exe = CalcPSDExecutable(workflow.cp,
                            'calculate_psd',
                            ifos=segment_file.ifo,
                            out_dir=out_dir,
                            tags=tags)
    node = exe.create_node()
    node.add_input_opt('--analysis-segment-file', segment_file)
    node.add_opt('--segment-name', segment_name)

    if gate_files is not None:
        ifo_gate = None
        for gate_file in gate_files:
            if gate_file.ifo == segment_file.ifo:
                ifo_gate = gate_file

        if ifo_gate is not None:
            node.add_input_opt('--gating-file', ifo_gate)

    if not exe.has_opt('frame-type'):
        node.add_input_list_opt('--frame-files', frame_files)
    node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
    workflow += node
    return node.output_files[0]
Beispiel #13
0
def save_veto_definer(cp, out_dir, tags=None):
    """ Retrieve the veto definer file and save it locally

    Parameters
    -----------
    cp : ConfigParser instance
    out_dir : path
    tags : list of strings
        Used to retrieve subsections of the ini file for
        configuration options.
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    veto_def_url = cp.get_opt_tags("workflow-segments",
                                   "segments-veto-definer-url", tags)
    veto_def_base_name = os.path.basename(veto_def_url)
    veto_def_new_path = os.path.abspath(
        os.path.join(out_dir, veto_def_base_name))
    # Don't need to do this if already done
    resolve_url(veto_def_url, out_dir)

    # and update location
    cp.set("workflow-segments", "segments-veto-definer-file",
           veto_def_new_path)
    return veto_def_new_path
Beispiel #14
0
def make_inj_table(workflow, inj_file, out_dir, tags=[]):
    make_analysis_dir(out_dir)
    node = PlotExecutable(workflow.cp, 'page_injections', ifos=workflow.ifos,
                    out_dir=out_dir, tags=tags).create_node()
    node.add_input_opt('--injection-file', inj_file)
    node.new_output_file_opt(inj_file.segment, '.html', '--output-file')
    workflow += node   
Beispiel #15
0
def convert_trig_to_hdf(workflow,
                        hdfbank,
                        xml_trigger_files,
                        out_dir,
                        tags=None):
    """Return the list of hdf5 trigger files outpus
    """
    if tags is None:
        tags = []
    #FIXME, make me not needed
    logging.info('convert single inspiral trigger files to hdf5')
    make_analysis_dir(out_dir)

    ifos, insp_groups = xml_trigger_files.categorize_by_attr('ifo')
    trig_files = FileList()
    for ifo, insp_group in zip(ifos, insp_groups):
        trig2hdf_exe = PyCBCTrig2HDFExecutable(workflow.cp,
                                               'trig2hdf',
                                               ifos=ifo,
                                               out_dir=out_dir,
                                               tags=tags)
        segs, insp_bundles = insp_group.categorize_by_attr('segment')
        for insps in insp_bundles:
            trig2hdf_node = trig2hdf_exe.create_node(insps, hdfbank[0])
            workflow.add_node(trig2hdf_node)
            trig_files += trig2hdf_node.output_files
    return trig_files
Beispiel #16
0
def setup_postprocessing_preparation(workflow, triggerFiles, output_dir,
                                     tags=[], **kwargs):
    """
    This function aims to be the gateway for preparing the output of the
    coincidence and/or matched-filtering stages of the workflow for calculation 
    of the significance of triggers and any rate statements that are to made. In
    practice this normally means combining output files, performing any
    clustering and performing mapping between triggers and simulations where
    needed.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    triggerFiles : pycbc.workflow.core.FileList
        An FileList of the trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    postProcPreppedFiles : pycbc.workflow.core.FileList
        A list of files that can be used as input for the post-processing stage.
    """
    logging.info("Entering post-processing preparation module.")
    make_analysis_dir(output_dir)

    # Parse for options in .ini file
    postProcPrepMethod = workflow.cp.get_opt_tags("workflow-postprocprep",
                                        "postprocprep-method", tags)

    # Scope here for adding different options/methods here. For now we only
    # have the single_stage ihope method which consists of converting the
    # ligolw_thinca output xml into one file, clustering, performing injection
    # finding and putting everything into one SQL database.
    if postProcPrepMethod == "PIPEDOWN_WORKFLOW":
        # If you want the intermediate output files, call this directly
        postPostPreppedFiles,_,_,_ = setup_postprocprep_pipedown_workflow(
                                       workflow, triggerFiles, output_dir,
                                       tags=tags, **kwargs)
    elif postProcPrepMethod == "PIPEDOWN_REPOP":
        postPostPreppedFiles,_,_,_ = setup_postprocprep_pipedown_workflow(
                                       workflow, triggerFiles, output_dir,
                                       tags=tags, do_repop=True, **kwargs)
    elif postProcPrepMethod == "GSTLAL_POSTPROCPREP":
        postPostPreppedFiles = setup_postprocprep_gstlal_workflow(workflow,
                                 triggerFiles, output_dir, tags=tags, **kwargs)
    else:
        errMsg = "Post-processing preparation method not recognized. Must be "
        errMsg += "one of PIPEDOWN_WORKFLOW or GSTLAL_POSTPROCPREP."
        raise ValueError(errMsg)

    logging.info("Leaving post-processing preparation module.")

    return postPostPreppedFiles
Beispiel #17
0
def setup_interval_coinc(workflow,
                         hdfbank,
                         trig_files,
                         veto_files,
                         veto_names,
                         out_dir,
                         tags=[]):
    """
    This function sets up exact match coincidence and background estimation
    using a folded interval technique.
    """
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    if len(hdfbank) > 1:
        raise ValueError('This coincidence method only supports a '
                         'pregenerated template bank')
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError(
            'This coincidence method only supports two ifo searches')

    findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
                                             'coinc',
                                             ifos=workflow.ifos,
                                             tags=tags,
                                             out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(
        workflow.cp.get_opt_tags('workflow-coincidence',
                                 'parallelization-factor', tags))

    stat_files = []
    for veto_file, veto_name in zip(veto_files, veto_names):
        bg_files = FileList()
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files,
                                                   hdfbank,
                                                   veto_file,
                                                   veto_name,
                                                   group_str,
                                                   tags=[veto_name,
                                                         str(i)])
            bg_files += coinc_node.output_files
            workflow.add_node(coinc_node)

        stat_files += [
            setup_statmap(workflow,
                          bg_files,
                          hdfbank,
                          out_dir,
                          tags=tags + [veto_name])
        ]

    return stat_files
    logging.info('...leaving coincidence ')
Beispiel #18
0
def merge_psds(workflow, files, ifo, out_dir, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    node = MergePSDFiles(workflow.cp, "merge_psds", ifos=ifo, out_dir=out_dir, tags=tags).create_node()
    node.add_input_list_opt("--psd-files", files)
    node.new_output_file_opt(workflow.analysis_time, ".hdf", "--output-file")
    workflow += node
    return node.output_files[0]
Beispiel #19
0
def make_sensitivity_plot(workflow, inj_file, out_dir, tags=[]):
    make_analysis_dir(out_dir)   
    for tag in workflow.cp.get_subsections('plot_sensitivity'):
        node = PlotExecutable(workflow.cp, 'plot_sensitivity', ifos=workflow.ifos,
                    out_dir=out_dir, tags=[tag] + tags).create_node()
        node.add_input_opt('--injection-file', inj_file)
        node.new_output_file_opt(inj_file.segment, '.png', '--output-file')
        workflow += node
def setup_coh_PTF_post_processing(workflow, trigger_files, trigger_cache, 
        output_dir, segment_dir, injection_trigger_files=None,
        injection_files=None, injection_trigger_caches=None,
        injection_caches=None, config_file=None, run_dir=None, ifos=None,
        web_dir=None, inj_tags=[], tags=[], **kwargs):
    """
    This function aims to be the gateway for running postprocessing in CBC
    offline workflows. Post-processing generally consists of calculating the
    significance of triggers and making any statements about trigger rates.
    Dedicated plotting jobs do not belong here.

    Properties
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList of the trigger files that are used as
        input at this stage.
    summary_xml_files : pycbc.workflow.core.FileList
        An FileList of the output of the analysislogging_utils module.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    post_proc_files : pycbc.workflow.core.FileList
        A list of the output from this stage.

    """
    logging.info("Entering post-processing stage.")
    make_analysis_dir(output_dir)

    # Parse for options in .ini file
    post_proc_method = workflow.cp.get_opt_tags("workflow-postproc",
                                                "postproc-method", tags)

    # Scope here for adding different options/methods here. For now we only
    # have the single_stage ihope method which consists of converting the
    # ligolw_thinca output xml into one file, clustering, performing injection
    # finding and putting everything into one SQL database.
    if post_proc_method == "COH_PTF_WORKFLOW":
        post_proc_files = setup_postproc_coh_PTF_workflow(workflow,
                trigger_files, trigger_cache, injection_trigger_files,
                injection_files, injection_trigger_caches, injection_caches,
                config_file, output_dir, web_dir, segment_dir, ifos=ifos,
                inj_tags=inj_tags, tags=tags, **kwargs)
    else:
        errMsg = "Post-processing method not recognized. Must be "
        errMsg += "COH_PTF_WORKFLOW."
        raise ValueError(errMsg)

    logging.info("Leaving post-processing module.")

    return post_proc_files
Beispiel #21
0
def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files,
                             inj_trig_files, stat_files, background_file,
                             veto_file, veto_name, out_dir, tags=None):
    """
    Set up exact match coincidence and background estimation

    This function sets up exact match coincidence and background estimation
    using a folded interval technique.
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence for injection')

    if len(hdfbank) > 1:
        raise ValueError('This coincidence method only supports a '
                         'pregenerated template bank')
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        err_msg = 'This coincidence method only supports two-ifo searches'
        raise ValueError(err_msg)

    # Wall time knob and memory knob
    factor = int(workflow.cp.get_opt_tags('workflow-coincidence',
                                          'parallelization-factor', tags))

    ffiles = {}
    ifiles = {}
    for ifo, ffi in zip(*full_data_trig_files.categorize_by_attr('ifo')):
        ffiles[ifo] = ffi[0]
    ifos, files = inj_trig_files.categorize_by_attr('ifo')  # ifos list is used later
    for ifo, ifi in zip(ifos, files):
        ifiles[ifo] = ifi[0]
    ifo0, ifo1 = ifos[0], ifos[1]
    combo = [(FileList([ifiles[ifo0], ifiles[ifo1]]), "injinj"),
             (FileList([ifiles[ifo0], ffiles[ifo1]]), "injfull"),
             (FileList([ifiles[ifo1], ffiles[ifo0]]), "fullinj"),
            ]
    bg_files = {'injinj':[], 'injfull':[], 'fullinj':[]}

    for trig_files, ctag in combo:
        findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc',
                                                 ifos=workflow.ifos,
                                                 tags=tags + [ctag],
                                                 out_dir=out_dir)
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank,
                                                   stat_files,
                                                   veto_file, veto_name,
                                                   group_str,
                                                   tags=["JOB"+str(i)])
            bg_files[ctag] += coinc_node.output_files
            workflow.add_node(coinc_node)

    return setup_statmap_inj(workflow, bg_files, background_file, hdfbank,
                             out_dir, tags=tags)
Beispiel #22
0
def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files, inj_trig_files,
                           background_file, veto_file, veto_name, out_dir, tags=[]):
    """
    This function sets up exact match coincidence and background estimation
    using a folded interval technique.
    """
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence for injection')

    if len(hdfbank) > 1:
        raise ValueError('This coincidence method only supports a '
                         'pregenerated template bank')
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError('This coincidence method only supports two ifo searches')

    combinecoinc_exe = PyCBCStatMapInjExecutable(workflow.cp, 'statmap_inj',
                                              ifos=workflow.ifos,
                                              tags=tags, out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags))
    
    ffiles = {}
    ifiles = {}
    ifos, files = full_data_trig_files.categorize_by_attr('ifo')
    for ifo, file in zip(ifos, files):
        ffiles[ifo] = file[0]
    ifos, files = inj_trig_files.categorize_by_attr('ifo')
    for ifo, file in zip(ifos, files):
        ifiles[ifo] = file[0]
    ifo0, ifo1 = ifos[0], ifos[1]
    combo = [(FileList([ifiles[ifo0], ifiles[ifo1]]), "injinj"),
             (FileList([ifiles[ifo0], ffiles[ifo1]]), "injfull"),
             (FileList([ifiles[ifo1], ffiles[ifo0]]), "fullinj"),
            ]
    bg_files = {'injinj':[],'injfull':[],'fullinj':[]}

    for trig_files, ctag in combo:
        findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc',
                                              ifos=workflow.ifos,
                                              tags=tags + [ctag], out_dir=out_dir)
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, 
                                           veto_file, veto_name,
                                           group_str, tags=([str(i)]))
            bg_files[ctag] += coinc_node.output_files
            workflow.add_node(coinc_node)

    combine_node = combinecoinc_exe.create_node(FileList(bg_files['injinj']), background_file, 
                                     FileList(bg_files['injfull']), FileList(bg_files['fullinj']))
    workflow.add_node(combine_node)

    logging.info('...leaving coincidence ')
    return combine_node.output_files[0]
Beispiel #23
0
def make_foreground_table(workflow, trig_file, bank_file, ftag, out_dir, tags=[]):
    make_analysis_dir(out_dir)
    node = PlotExecutable(workflow.cp, 'page_foreground', ifos=workflow.ifos,
                    out_dir=out_dir, tags=tags).create_node()
    node.add_input_opt('--bank-file', bank_file)
    node.add_opt('--foreground-tag', ftag)
    node.add_input_opt('--trigger-file', trig_file)
    node.new_output_file_opt(bank_file.segment, '.html', '--output-file')
    workflow += node
Beispiel #24
0
def find_injections_in_hdf_coinc(workflow, inj_coinc_file, inj_xml_file, 
                                 veto_file, veto_name, out_dir, tags=[]):
    make_analysis_dir(out_dir)
    exe = PyCBCHDFInjFindExecutable(workflow.cp, 'hdfinjfind', 
                                    ifos=workflow.ifos, 
                                    out_dir=out_dir, tags=tags)
    node = exe.create_node(inj_coinc_file, inj_xml_file, veto_file, veto_name, tags)
    workflow += node
    return node.output_files[0]     
Beispiel #25
0
def merge_psds(workflow, files, ifo, out_dir, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    node = MergePSDFiles(workflow.cp, 'merge_psds',
                         ifos=ifo, out_dir=out_dir,
                         tags=tags).create_node()
    node.add_input_list_opt('--psd-files', files)
    node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
    workflow += node
    return node.output_files[0]
Beispiel #26
0
def setup_multiifo_interval_coinc(workflow,
                                  hdfbank,
                                  trig_files,
                                  stat_files,
                                  veto_file,
                                  veto_name,
                                  out_dir,
                                  pivot_ifo,
                                  fixed_ifo,
                                  tags=None):
    """
    This function sets up exact match multiifo coincidence
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    ifos, _ = trig_files.categorize_by_attr('ifo')
    findcoinc_exe = PyCBCFindMultiifoCoincExecutable(workflow.cp,
                                                     'multiifo_coinc',
                                                     ifos=ifos,
                                                     tags=tags,
                                                     out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(
        workflow.cp.get_opt_tags('workflow-coincidence',
                                 'parallelization-factor',
                                 [findcoinc_exe.ifo_string] + tags))

    statmap_files = []
    bg_files = FileList()
    for i in range(factor):
        group_str = '%s/%s' % (i, factor)
        coinc_node = findcoinc_exe.create_node(trig_files,
                                               hdfbank,
                                               stat_files,
                                               veto_file,
                                               veto_name,
                                               group_str,
                                               pivot_ifo,
                                               fixed_ifo,
                                               tags=['JOB' + str(i)])
        bg_files += coinc_node.output_files
        workflow.add_node(coinc_node)

    statmap_files = setup_multiifo_statmap(workflow,
                                           ifos,
                                           bg_files,
                                           out_dir,
                                           tags=tags)

    logging.info('...leaving coincidence ')
    return statmap_files
Beispiel #27
0
def find_injections_in_hdf_coinc(workflow, inj_coinc_file, inj_xml_file,
                                 veto_file, veto_name, out_dir, tags=None):
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    exe = PyCBCHDFInjFindExecutable(workflow.cp, 'hdfinjfind',
                                    ifos=workflow.ifos,
                                    out_dir=out_dir, tags=tags)
    node = exe.create_node(inj_coinc_file, inj_xml_file, veto_file, veto_name, tags)
    workflow += node
    return node.output_files[0]
Beispiel #28
0
def make_psd_file(workflow, frame_files, segment_file, segment_name, out_dir, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    node = MergeExecutable(workflow.cp, 'calculate_psd', ifos=segment_file.ifo,
                          out_dir=out_dir, tags=tags).create_node()
    node.add_input_opt('--analysis-segment-file', segment_file)
    node.add_opt('--segment-name', segment_name)
    node.add_input_list_opt('--frame-files', frame_files)
    node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
    workflow += node
    return node.output_files[0]
Beispiel #29
0
def setup_multiifo_interval_coinc(workflow,
                                  hdfbank,
                                  trig_files,
                                  stat_files,
                                  veto_files,
                                  veto_names,
                                  out_dir,
                                  pivot_ifo,
                                  fixed_ifo,
                                  tags=None):
    """
    This function sets up exact match multiifo coincidence
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    if len(hdfbank) != 1:
        raise ValueError('Must use exactly 1 bank file for this coincidence '
                         'method, I got %i !' % len(hdfbank))
    hdfbank = hdfbank[0]

    ifos, _ = trig_files.categorize_by_attr('ifo')
    findcoinc_exe = PyCBCFindMultiifoCoincExecutable(workflow.cp,
                                                     'multiifo_coinc',
                                                     ifos=ifos,
                                                     tags=tags,
                                                     out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(
        workflow.cp.get_opt_tags('workflow-coincidence',
                                 'parallelization-factor', tags))

    bg_files = []
    for veto_file, veto_name in zip(veto_files, veto_names):
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files,
                                                   hdfbank,
                                                   stat_files,
                                                   veto_file,
                                                   veto_name,
                                                   group_str,
                                                   pivot_ifo,
                                                   fixed_ifo,
                                                   tags=[veto_name,
                                                         str(i)])
            bg_files += coinc_node.output_files
            workflow.add_node(coinc_node)

    logging.info('...leaving coincidence ')
    return bg_files
Beispiel #30
0
def make_coinc_snrchi_plot(workflow, inj_file, inj_trig, stat_file, trig_file, out_dir, tags=[]):
    make_analysis_dir(out_dir)    
    for tag in workflow.cp.get_subsections('plot_coinc_snrchi'):
        node = PlotExecutable(workflow.cp, 'plot_coinc_snrchi', ifos=inj_trig.ifo,
                    out_dir=out_dir, tags=[tag] + tags).create_node()
        node.add_input_opt('--found-injection-file', inj_file)
        node.add_input_opt('--single-injection-file', inj_trig)
        node.add_input_opt('--coinc-statistic-file', stat_file)
        node.add_input_opt('--single-trigger-file', trig_file)
        node.new_output_file_opt(inj_file.segment, '.png', '--output-file')
        workflow += node
Beispiel #31
0
def merge_single_detector_hdf_files(workflow, bank_file, trigger_files, out_dir, tags=[]):
    make_analysis_dir(out_dir)
    out = FileList()
    for ifo in workflow.ifos:
        node = MergeExecutable(workflow.cp, 'hdf_trigger_merge', 
                        ifos=ifo, out_dir=out_dir, tags=tags).create_node()
        node.add_input_opt('--bank-file', bank_file)
        node.add_input_list_opt('--trigger-files', trigger_files.find_output_with_ifo(ifo))
        node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
        workflow += node
        out += node.output_files
    return out
Beispiel #32
0
def setup_gating_workflow(workflow,
                          science_segs,
                          datafind_outs,
                          output_dir=None,
                          tags=None):
    '''
    Setup gating section of CBC workflow. At present this only supports pregenerated
    gating files, in the future these could be created within the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    science_segs : Keyed dictionary of glue.segmentlist objects
        scienceSegs[ifo] holds the science segments to be analysed for each
        ifo. 
    datafind_outs : pycbc.workflow.core.FileList
        The file list containing the datafind files.
    output_dir : path string
        The directory where data products will be placed. 
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    gate_files : pycbc.workflow.core.FileList
        The FileList holding the gate files, 0 or 1 per ifo
    '''
    if tags is None:
        tags = []
    logging.info("Entering gating module.")
    make_analysis_dir(output_dir)
    cp = workflow.cp

    # Parse for options in ini file.
    try:
        gateMethod = cp.get_opt_tags("workflow-gating", "gating-method", tags)
    except:
        # Gating is optional, just return an empty list if not
        # provided.
        return FileList([])

    if gateMethod == "PREGENERATED_FILE":
        logging.info("Setting gating from pre-generated file(s).")
        gate_files = setup_gate_pregenerated(workflow, tags=tags)
    else:
        errMsg = "Gating method not recognized. Only "
        errMsg += "PREGENERATED_FILE is currently supported."
        raise ValueError(errMsg)

    logging.info("Leaving gating module.")
    return gate_files
Beispiel #33
0
def veto_injections(workflow, inj_file, veto_file, veto_name, out_dir, tags=None):
    tags = [] if tags is None else tags
    make_analysis_dir(out_dir)
    
    node = Executable(workflow.cp, 'strip_injections', ifos=workflow.ifos,
                          out_dir=out_dir, tags=tags).create_node()
    node.add_opt('--segment-name', veto_name)
    node.add_input_opt('--veto-file', veto_file)
    node.add_input_opt('--injection-file', inj_file)
    node.add_opt('--ifos', ' '.join(workflow.ifos))
    node.new_output_file_opt(workflow.analysis_time, '.xml', '--output-file')
    workflow += node
    return node.output_files[0]  
Beispiel #34
0
def veto_injections(workflow, inj_file, veto_file, veto_name, out_dir, tags=None):
    tags = [] if tags is None else tags
    make_analysis_dir(out_dir)

    node = Executable(workflow.cp, 'strip_injections', ifos=workflow.ifos,
                          out_dir=out_dir, tags=tags).create_node()
    node.add_opt('--segment-name', veto_name)
    node.add_input_opt('--veto-file', veto_file)
    node.add_input_opt('--injection-file', inj_file)
    node.add_opt('--ifos', ' '.join(workflow.ifos))
    node.new_output_file_opt(workflow.analysis_time, '.xml', '--output-file')
    workflow += node
    return node.output_files[0]
Beispiel #35
0
def make_psd_file(workflow, frame_files, segment_file, segment_name, out_dir, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    exe = CalcPSDExecutable(workflow.cp, "calculate_psd", ifos=segment_file.ifo, out_dir=out_dir, tags=tags)
    node = exe.create_node()
    node.add_input_opt("--analysis-segment-file", segment_file)
    node.add_opt("--segment-name", segment_name)

    if not exe.has_opt("frame-type"):
        node.add_input_list_opt("--frame-files", frame_files)
    node.new_output_file_opt(workflow.analysis_time, ".hdf", "--output-file")
    workflow += node
    return node.output_files[0]
Beispiel #36
0
def make_average_psd(workflow, psd_files, out_dir, tags=None, output_fmt=".txt"):
    make_analysis_dir(out_dir)
    tags = [] if tags is None else tags
    node = AvgPSDExecutable(workflow.cp, "average_psd", ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node()
    node.add_input_list_opt("--input-files", psd_files)

    if len(workflow.ifos) > 1:
        node.new_output_file_opt(workflow.analysis_time, output_fmt, "--detector-avg-file")

    node.new_multiifo_output_list_opt("--time-avg-file", workflow.ifos, workflow.analysis_time, output_fmt, tags=tags)

    workflow += node
    return node.output_files
Beispiel #37
0
def setup_psd_workflow(workflow, science_segs, datafind_outs,
                             output_dir=None, tags=None):
    '''
    Setup static psd section of CBC workflow. At present this only supports pregenerated
    psd files, in the future these could be created within the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    science_segs : Keyed dictionary of glue.segmentlist objects
        scienceSegs[ifo] holds the science segments to be analysed for each
        ifo. 
    datafind_outs : pycbc.workflow.core.FileList
        The file list containing the datafind files.
    output_dir : path string
        The directory where data products will be placed. 
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    psd_files : pycbc.workflow.core.FileList
        The FileList holding the psd files, 0 or 1 per ifo
    '''
    if tags is None:
        tags = []
    logging.info("Entering static psd module.")
    make_analysis_dir(output_dir)
    cp = workflow.cp
    
    # Parse for options in ini file.  
    try:
        psdMethod = cp.get_opt_tags("workflow-psd", "psd-method",
                                     tags)
    except:
        # Predefined PSD sare optional, just return an empty list if not
        # provided.
        return FileList([])

    if psdMethod == "PREGENERATED_FILE":
        logging.info("Setting psd from pre-generated file(s).")
        psd_files = setup_psd_pregenerated(workflow, tags=tags)
    else:
        errMsg = "PSD method not recognized. Only "
        errMsg += "PREGENERATED_FILE is currently supported."
        raise ValueError(errMsg)
    
    logging.info("Leaving psd module.")
    return psd_files
Beispiel #38
0
def merge_single_detector_hdf_files(workflow, bank_file, trigger_files, out_dir, tags=None):
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    out = FileList()
    for ifo in workflow.ifos:
        node = MergeExecutable(workflow.cp, 'hdf_trigger_merge',
                        ifos=ifo, out_dir=out_dir, tags=tags).create_node()
        node.add_input_opt('--bank-file', bank_file)
        node.add_input_list_opt('--trigger-files', trigger_files.find_output_with_ifo(ifo))
        node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
        workflow += node
        out += node.output_files
    return out
Beispiel #39
0
def make_snrchi_plot(workflow, trig_files, veto_file, out_dir, tags=[]):
    make_analysis_dir(out_dir)    
    for tag in workflow.cp.get_subsections('plot_snrchi'):
        for trig_file in trig_files:
            node = PlotExecutable(workflow.cp, 'plot_snrchi',
                        ifos=trig_file.ifo, 
                        out_dir=out_dir, 
                        tags=[tag] + tags).create_node()

            node.set_memory(15000)
            node.add_input_opt('--trigger-file', trig_file)
            node.add_input_opt('--veto-file', veto_file)
            node.new_output_file_opt(trig_file.segment, '.png', '--output-file')
            workflow += node  
Beispiel #40
0
def convert_bank_to_hdf(workflow, xmlbank, out_dir, tags=[]):
    """Return the template bank in hdf format
    """
    #FIXME, make me not needed
    if len(xmlbank) > 1:
        raise ValueError('Can only convert a single template bank')

    logging.info('convert template bank to HDF')
    make_analysis_dir(out_dir)
    bank2hdf_exe = PyCBCBank2HDFExecutable(workflow.cp, 'bank2hdf',
                                            ifos=workflow.ifos,
                                            out_dir=out_dir, tags=tags)
    bank2hdf_node = bank2hdf_exe.create_node(xmlbank[0])
    workflow.add_node(bank2hdf_node)
    return bank2hdf_node.output_files
Beispiel #41
0
def make_average_psd(workflow, psd_files, out_dir, tags=None,
                     output_fmt='.txt'):
    make_analysis_dir(out_dir)
    tags = [] if tags is None else tags
    node = AvgPSDExecutable(workflow.cp, 'average_psd', ifos=workflow.ifos,
                            out_dir=out_dir, tags=tags).create_node()
    node.add_input_list_opt('--input-files', psd_files)
    node.new_output_file_opt(workflow.analysis_time, output_fmt,
                             '--detector-avg-file')

    node.new_multiifo_output_list_opt('--time-avg-file', workflow.ifos,
                                 workflow.analysis_time, output_fmt, tags=tags)

    workflow += node
    return node.output_files
Beispiel #42
0
def convert_bank_to_hdf(workflow, xmlbank, out_dir, tags=None):
    """Return the template bank in hdf format"""
    if tags is None:
        tags = []
    #FIXME, make me not needed
    if len(xmlbank) > 1:
        raise ValueError('Can only convert a single template bank')

    logging.info('convert template bank to HDF')
    make_analysis_dir(out_dir)
    bank2hdf_exe = PyCBCBank2HDFExecutable(workflow.cp, 'bank2hdf',
                                            ifos=workflow.ifos,
                                            out_dir=out_dir, tags=tags)
    bank2hdf_node = bank2hdf_exe.create_node(xmlbank[0])
    workflow.add_node(bank2hdf_node)
    return bank2hdf_node.output_files
Beispiel #43
0
def setup_interval_coinc(workflow, hdfbank, trig_files,
                         veto_files, veto_names, out_dir, tags=[]):
    """
    This function sets up exact match coincidence and background estimation
    using a folded interval technique.
    """
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    if len(hdfbank) > 1:
        raise ValueError('This coincidence method only supports a '
                         'pregenerated template bank')
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError('This coincidence method only supports two ifo searches')

    findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc',
                                              ifos=workflow.ifos,
                                              tags=tags, out_dir=out_dir)

    combinecoinc_exe = PyCBCStatMapExecutable(workflow.cp, 'statmap',
                                              ifos=workflow.ifos,
                                              tags=tags, out_dir=out_dir)
                                         
    # Wall time knob and memory knob
    factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags))

    stat_files = FileList()
    for veto_file, veto_name in zip(veto_files, veto_names):
        bg_files = FileList()
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, 
                                                   veto_file, veto_name,
                                                   group_str,
                                                   tags= [veto_name, str(i)])
            bg_files += coinc_node.output_files
            workflow.add_node(coinc_node)
             
        combine_node = combinecoinc_exe.create_node(bg_files, tags=[veto_name])
        workflow.add_node(combine_node)
        stat_files += combine_node.output_files
        
    return stat_files
    logging.info('...leaving coincidence ')
Beispiel #44
0
def setup_splittable_workflow(workflow, input_tables, out_dir=None, tags=None):
    '''
    This function aims to be the gateway for code that is responsible for taking
    some input file containing some table, and splitting into multiple files
    containing different parts of that table. For now the only supported operation
    is using lalapps_splitbank to split a template bank xml file into multiple
    template bank xml files.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    input_tables : pycbc.workflow.core.FileList
        The input files to be split up.
    out_dir : path
        The directory in which output will be written.

    Returns
    --------
    split_table_outs : pycbc.workflow.core.FileList
        The list of split up files as output from this job.
    '''
    if tags is None:
        tags = []
    logging.info("Entering split output files module.")
    make_analysis_dir(out_dir)
    # Parse for options in .ini file
    splitMethod = workflow.cp.get_opt_tags("workflow-splittable",
                                           "splittable-method", tags)

    if splitMethod == "IN_WORKFLOW":
        # Scope here for choosing different options
        logging.info("Adding split output file jobs to workflow.")
        split_table_outs = setup_splittable_dax_generated(
            workflow, input_tables, out_dir, tags)
    elif splitMethod == "NOOP":
        # Probably better not to call the module at all, but this option will
        # return the input file list.
        split_table_outs = input_tables
    else:
        errMsg = "Splittable method not recognized. Must be one of "
        errMsg += "IN_WORKFLOW or NOOP."
        raise ValueError(errMsg)

    logging.info("Leaving split output files module.")
    return split_table_outs
Beispiel #45
0
def setup_splittable_workflow(workflow, input_tables, out_dir=None, tags=None):
    '''
    This function aims to be the gateway for code that is responsible for taking
    some input file containing some table, and splitting into multiple files
    containing different parts of that table. For now the only supported operation
    is using lalapps_splitbank to split a template bank xml file into multiple
    template bank xml files.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    input_tables : pycbc.workflow.core.FileList
        The input files to be split up.
    out_dir : path
        The directory in which output will be written.

    Returns
    --------
    split_table_outs : pycbc.workflow.core.FileList
        The list of split up files as output from this job.
    '''
    if tags is None:
        tags = []
    logging.info("Entering split output files module.")
    make_analysis_dir(out_dir)
    # Parse for options in .ini file
    splitMethod = workflow.cp.get_opt_tags("workflow-splittable",
                                           "splittable-method", tags)

    if splitMethod == "IN_WORKFLOW":
        # Scope here for choosing different options
        logging.info("Adding split output file jobs to workflow.")
        split_table_outs = setup_splittable_dax_generated(workflow,
                input_tables, out_dir, tags)
    elif splitMethod == "NOOP":
        # Probably better not to call the module at all, but this option will
        # return the input file list.
        split_table_outs = input_tables
    else:
        errMsg = "Splittable method not recognized. Must be one of "
        errMsg += "IN_WORKFLOW or NOOP."
        raise ValueError(errMsg)

    logging.info("Leaving split output files module.")
    return split_table_outs
Beispiel #46
0
def setup_gating_workflow(workflow, output_dir=None, tags=None):
    '''
    Setup gating section of CBC workflow. At present this only supports pregenerated
    gating files, in the future these could be created within the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    output_dir : path string
        The directory where data products will be placed.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    gate_files : pycbc.workflow.core.FileList
        The FileList holding the gate files, 0 or 1 per ifo
    '''
    if tags is None:
        tags = []
    logging.info("Entering gating module.")
    make_analysis_dir(output_dir)
    cp = workflow.cp

    # Parse for options in ini file.
    try:
        gateMethod = cp.get_opt_tags("workflow-gating", "gating-method",
                                     tags)
    except ConfigParser.Error:
        # Gating is optional, just return an empty list if not
        # provided.
        return FileList([])

    if gateMethod == "PREGENERATED_FILE":
        logging.info("Setting gating from pre-generated file(s).")
        gate_files = setup_gate_pregenerated(workflow,
                                             output_dir=output_dir, tags=tags)
    else:
        errMsg = "Gating method not recognized. Only "
        errMsg += "PREGENERATED_FILE is currently supported."
        raise ValueError(errMsg)
    
    logging.info("Leaving gating module.")
    return gate_files
Beispiel #47
0
def convert_trig_to_hdf(workflow, hdfbank, xml_trigger_files, out_dir, tags=[]):
    """Return the list of hdf5 trigger files outpus
    """
    #FIXME, make me not needed
    logging.info('convert single inspiral trigger files to hdf5')
    make_analysis_dir(out_dir)

    ifos, insp_groups = xml_trigger_files.categorize_by_attr('ifo')
    trig_files = FileList()
    for ifo, insp_group in zip(ifos,  insp_groups):
        trig2hdf_exe = PyCBCTrig2HDFExecutable(workflow.cp, 'trig2hdf',
                                       ifos=ifo, out_dir=out_dir, tags=tags)
        segs, insp_bundles = insp_group.categorize_by_attr('segment')
        for insps in  insp_bundles:
            trig2hdf_node =  trig2hdf_exe.create_node(insps, hdfbank[0])
            workflow.add_node(trig2hdf_node)
            trig_files += trig2hdf_node.output_files
    return trig_files
Beispiel #48
0
def setup_interval_coinc(workflow, hdfbank, trig_files, stat_files,
                         veto_files, veto_names, out_dir, tags=None):
    """
    This function sets up exact match coincidence and background estimation

    using a folded interval technique.
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    if len(hdfbank) != 1:
        raise ValueError('Must use exactly 1 bank file for this coincidence '
                         'method, I got %i !' % len(hdfbank))
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError('This coincidence method only supports two ifo searches')

    findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc',
                                             ifos=workflow.ifos,
                                             tags=tags, out_dir=out_dir)
                                         
    # Wall time knob and memory knob
    factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags))

    statmap_files = []
    for veto_file, veto_name in zip(veto_files, veto_names):
        bg_files = FileList()
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank,
                                                   stat_files,
                                                   veto_file, veto_name,
                                                   group_str,
                                                   tags=[veto_name, str(i)])
            bg_files += coinc_node.output_files
            workflow.add_node(coinc_node)
             
        statmap_files += [setup_statmap(workflow, bg_files, hdfbank, out_dir, tags=tags + [veto_name])]

    logging.info('...leaving coincidence ')
    return statmap_files
Beispiel #49
0
def save_veto_definer(cp, out_dir, tags=[]):
    """ Retrieve the veto definer file and save it locally
    
    Parameters
    -----------
    cp : ConfigParser instance
    out_dir : path
    tags : list of strings
        Used to retrieve subsections of the ini file for
        configuration options.
    """
    make_analysis_dir(out_dir)
    vetoDefUrl = cp.get_opt_tags("workflow-segments",
                                 "segments-veto-definer-url", tags)
    vetoDefBaseName = os.path.basename(vetoDefUrl)
    vetoDefNewPath = os.path.abspath(os.path.join(out_dir, vetoDefBaseName))
    urllib.urlretrieve(vetoDefUrl, vetoDefNewPath)
    # and update location
    cp.set("workflow-segments", "segments-veto-definer-file", vetoDefNewPath)
Beispiel #50
0
def setup_multiifo_combine_statmap(workflow, final_bg_file_list, out_dir,
                                   tags):
    """
    Combine the multiifo statmap files into one background file
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up multiifo combine statmap')

    cstat_exe = PyCBCMultiifoCombineStatmap(workflow.cp,
                                            'combine_statmap',
                                            ifos=workflow.ifos,
                                            tags=tags,
                                            out_dir=out_dir)

    ifolist = ' '.join(workflow.ifos)
    cluster_window = float(
        workflow.cp.get_opt_tags('combine_statmap', 'cluster-window', tags))
    combine_statmap_node = cstat_exe.create_node(final_bg_file_list, ifolist,
                                                 cluster_window, tags)
    workflow.add_node(combine_statmap_node)
    return combine_statmap_node.output_file
Beispiel #51
0
def setup_coh_PTF_post_processing(workflow,
                                  trigger_files,
                                  trigger_cache,
                                  output_dir,
                                  segment_dir,
                                  injection_trigger_files=None,
                                  injection_files=None,
                                  injection_trigger_caches=None,
                                  injection_caches=None,
                                  config_file=None,
                                  run_dir=None,
                                  ifos=None,
                                  web_dir=None,
                                  inj_tags=[],
                                  tags=[],
                                  **kwargs):
    """
    This function aims to be the gateway for running postprocessing in CBC
    offline workflows. Post-processing generally consists of calculating the
    significance of triggers and making any statements about trigger rates.
    Dedicated plotting jobs do not belong here.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList of the trigger files that are used as
        input at this stage.
    summary_xml_files : pycbc.workflow.core.FileList
        An FileList of the output of the analysislogging_utils module.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    post_proc_files : pycbc.workflow.core.FileList
        A list of the output from this stage.

    """
    logging.info("Entering post-processing stage.")
    make_analysis_dir(output_dir)

    # Parse for options in .ini file
    post_proc_method = workflow.cp.get_opt_tags("workflow-postproc",
                                                "postproc-method", tags)

    # Scope here for adding different options/methods here. For now we only
    # have the single_stage ihope method which consists of converting the
    # ligolw_thinca output xml into one file, clustering, performing injection
    # finding and putting everything into one SQL database.
    if post_proc_method == "COH_PTF_WORKFLOW":
        post_proc_files = setup_postproc_coh_PTF_workflow(
            workflow,
            trigger_files,
            trigger_cache,
            injection_trigger_files,
            injection_files,
            injection_trigger_caches,
            injection_caches,
            config_file,
            output_dir,
            web_dir,
            segment_dir,
            ifos=ifos,
            inj_tags=inj_tags,
            tags=tags,
            **kwargs)
    else:
        errMsg = "Post-processing method not recognized. Must be "
        errMsg += "COH_PTF_WORKFLOW."
        raise ValueError(errMsg)

    logging.info("Leaving post-processing module.")

    return post_proc_files
Beispiel #52
0
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None,
                            tags=None):
    """
    Setup datafind section of the workflow. This section is responsible for
    generating, or setting up the workflow to generate, a list of files that
    record the location of the frame files needed to perform the analysis.
    There could be multiple options here, the datafind jobs could be done at
    run time or could be put into a dag. The subsequent jobs will know
    what was done here from the OutFileList containing the datafind jobs
    (and the Dagman nodes if appropriate.
    For now the only implemented option is to generate the datafind files at
    runtime. This module can also check if the frameFiles actually exist, check
    whether the obtained segments line up with the original ones and update the
    science segments to reflect missing data files.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        The workflow class that stores the jobs that will be run.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse.
    outputDir : path
        All output files written by datafind processes will be written to this
        directory.
    seg_file : SegFile, optional (default=None)
        The file returned by get_science_segments containing the science
        segments and the associated segment_summary. This will
        be used for the segment_summary test and is required if, and only if,
        performing that test.
    tags : list of string, optional (default=None)
        Use this to specify tags. This can be used if this module is being
        called more than once to give call specific configuration (by setting
        options in [workflow-datafind-${TAG}] rather than [workflow-datafind]).
        This is also used to tag the Files returned by the class to uniqueify
        the Files and uniqueify the actual filename.
        FIXME: Filenames may not be unique with current codes!

    Returns
    --------
    datafindOuts : OutGroupList
        List of all the datafind output files for use later in the pipeline.
    sci_avlble_file : SegFile
        SegFile containing the analysable time after checks in the datafind
        module are applied to the input segment list. For production runs this
        is expected to be equal to the input segment list.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse. If
        the updateSegmentTimes kwarg is given this will be updated to reflect
        any instances of missing data.
    sci_avlble_name : string
        The name with which the analysable time is stored in the
        sci_avlble_file.
    """
    if tags is None:
        tags = []
    logging.info("Entering datafind module")
    make_analysis_dir(outputDir)
    cp = workflow.cp

    # Parse for options in ini file
    datafindMethod = cp.get_opt_tags("workflow-datafind",
                                     "datafind-method", tags)

    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-gaps", tags):
        checkSegmentGaps = cp.get_opt_tags("workflow-datafind",
                                          "datafind-check-segment-gaps", tags)
    else:
        checkSegmentGaps = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-frames-exist", tags):
        checkFramesExist = cp.get_opt_tags("workflow-datafind",
                                          "datafind-check-frames-exist", tags)
    else:
        checkFramesExist = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-summary", tags):
        checkSegmentSummary = cp.get_opt_tags("workflow-datafind",
                                       "datafind-check-segment-summary", tags)
    else:
        checkSegmentSummary = "no_test"

    logging.info("Starting datafind with setup_datafind_runtime_generated")
    if datafindMethod == "AT_RUNTIME_MULTIPLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_SINGLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_SINGLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)

    elif datafindMethod == "FROM_PREGENERATED_LCF_FILES":
        ifos = scienceSegs.keys()
        datafindcaches, datafindouts = \
            setup_datafind_from_pregenerated_lcf_files(cp, ifos,
                                                       outputDir, tags=tags)
    else:
        msg = "Entry datafind-method in [workflow-datafind] does not have "
        msg += "expected value. Valid values are "
        msg += "AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES "
        msg += "AT_RUNTIME_MULTIPLE_CACHES or AT_RUNTIME_SINGLE_CACHES. "
        msg += "Consult the documentation for more info."
        raise ValueError(msg)

    using_backup_server = False
    if datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES" or \
                                  datafindMethod == "AT_RUNTIME_SINGLE_FRAMES":
        if cp.has_option_tags("workflow-datafind",
                          "datafind-backup-datafind-server", tags):
            using_backup_server = True
            backup_server = cp.get_opt_tags("workflow-datafind",
                                      "datafind-backup-datafind-server", tags)
            cp_new = copy.deepcopy(cp)
            cp_new.set("workflow-datafind",
                                "datafind-ligo-datafind-server", backup_server)
            cp_new.set('datafind', 'urltype', 'gsiftp')
            backup_datafindcaches, backup_datafindouts =\
                setup_datafind_runtime_frames_single_call_perifo(cp_new,
                                             scienceSegs, outputDir, tags=tags)
            backup_datafindouts = datafind_keep_unique_backups(\
                                             backup_datafindouts, datafindouts)
            datafindcaches.extend(backup_datafindcaches)
            datafindouts.extend(backup_datafindouts)

    logging.info("setup_datafind_runtime_generated completed")
    # If we don't have frame files covering all times we can update the science
    # segments.
    if checkSegmentGaps in ['warn','update_times','raise_error']:
        logging.info("Checking science segments against datafind output....")
        newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        logging.info("New segments calculated from data find output.....")
        missingData = False
        for ifo in scienceSegs.keys():
            # If no science segments in input then do nothing
            if not scienceSegs[ifo]:
                msg = "No science segments are present for ifo %s, " %(ifo)
                msg += "the segment metadata indicates there is no analyzable"
                msg += " strain data between the selected GPS start and end "
                msg += "times."
                logging.warning(msg)
                continue
            if not newScienceSegs.has_key(ifo):
                msg = "No data frames were found corresponding to the science "
                msg += "segments for ifo %s" %(ifo)
                logging.error(msg)
                missingData = True
                if checkSegmentGaps == 'update_times':
                    scienceSegs[ifo] = segments.segmentlist()
                continue
            missing = scienceSegs[ifo] - newScienceSegs[ifo]
            if abs(missing):
                msg = "From ifo %s we are missing frames covering:" %(ifo)
                msg += "\n%s" % "\n".join(map(str, missing))
                missingData = True
                logging.error(msg)
                if checkSegmentGaps == 'update_times':
                    # Remove missing time, so that we can carry on if desired
                    logging.info("Updating science segments for ifo %s."
                                 %(ifo))
                    scienceSegs[ifo] = scienceSegs[ifo] - missing

        if checkSegmentGaps == 'raise_error' and missingData:
            raise ValueError("Workflow cannot find needed data, exiting.")
        logging.info("Done checking, any discrepancies are reported above.")
    elif checkSegmentGaps == 'no_test':
        pass
    else:
        errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Do all of the frame files that were returned actually exist?
    if checkFramesExist in ['warn','update_times','raise_error']:
        logging.info("Verifying that all frames exist on disk.")
        missingFrSegs, missingFrames = \
                          get_missing_segs_from_frame_file_cache(datafindcaches)
        missingFlag = False
        for ifo in missingFrames.keys():
            # If no data in the input then do nothing
            if not scienceSegs[ifo]:
                continue
            # If using a backup server, does the frame exist remotely?
            if using_backup_server:
                # WARNING: This will be slow, but hopefully it will not occur
                #          for too many frames. This could be optimized if
                #          it becomes necessary.
                new_list = []
                for frame in missingFrames[ifo]:
                    for dfout in datafindouts:
                        dfout_pfns = list(dfout.pfns)
                        dfout_urls = [a.url for a in dfout_pfns]
                        if frame.url in dfout_urls:
                            pfn = dfout_pfns[dfout_urls.index(frame.url)]
                            dfout.removePFN(pfn)
                            if len(dfout.pfns) == 0:
                                new_list.append(frame)
                            else:
                                msg = "Frame %s not found locally. "\
                                                                  %(frame.url,)
                                msg += "Replacing with remote url(s) %s." \
                                           %(str([a.url for a in dfout.pfns]),)
                                logging.info(msg)
                            break
                    else:
                        new_list.append(frame)
                missingFrames[ifo] = new_list
            if missingFrames[ifo]:
                msg = "From ifo %s we are missing the following frames:" %(ifo)
                msg +='\n'.join([a.url for a in missingFrames[ifo]])
                missingFlag = True
                logging.error(msg)
            if checkFramesExist == 'update_times':
                # Remove missing times, so that we can carry on if desired
                logging.info("Updating science times for ifo %s." %(ifo))
                scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo]

        if checkFramesExist == 'raise_error' and missingFlag:
            raise ValueError("Workflow cannot find all frames, exiting.")
        logging.info("Finished checking frames.")
    elif checkFramesExist == 'no_test':
        pass
    else:
        errMsg = "checkFramesExist kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Check if there are cases where frames exist, but no entry in the segment
    # summary table are present.
    if checkSegmentSummary in ['warn', 'raise_error']:
        logging.info("Checking the segment summary table against frames.")
        dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        missingFlag = False
        # NOTE: Should this be overrideable in the config file?
        sci_seg_name = "SCIENCE"
        if seg_file is None:
            err_msg = "You must provide the science segments SegFile object "
            err_msg += "if using the datafind-check-segment-summary option."
            raise ValueError(err_msg)
        if seg_file.seg_summ_dict is None:
            err_msg = "The provided science segments SegFile object must "
            err_msg += "contain a valid segment_summary table if using the "
            err_msg += "datafind-check-segment-summary option."
            raise ValueError(err_msg)
        seg_summary_times = seg_file.seg_summ_dict
        for ifo in dfScienceSegs.keys():
            curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name]
            missing = (dfScienceSegs[ifo] & seg_file.valid_segments)
            missing.coalesce()
            missing = missing - curr_seg_summ_times
            missing.coalesce()
            scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo]
            scienceButNotFrame.coalesce()
            missing2 = scienceSegs[ifo] - scienceButNotFrame
            missing2.coalesce()
            missing2 = missing2 - curr_seg_summ_times
            missing2.coalesce()
            if abs(missing):
                msg = "From ifo %s the following times have frames, " %(ifo)
                msg += "but are not covered in the segment summary table."
                msg += "\n%s" % "\n".join(map(str, missing))
                logging.error(msg)
                missingFlag = True
            if abs(missing2):
                msg = "From ifo %s the following times have frames, " %(ifo)
                msg += "are science, and are not covered in the segment "
                msg += "summary table."
                msg += "\n%s" % "\n".join(map(str, missing2))
                logging.error(msg)
                missingFlag = True
        if checkSegmentSummary == 'raise_error' and missingFlag:
            errMsg = "Segment_summary discrepancy detected, exiting."
            raise ValueError(errMsg)
    elif checkSegmentSummary == 'no_test':
        pass
    else:
        errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', "
        errMsg += "'warn', or 'raise_error'."
        raise ValueError(errMsg)

    # Now need to create the file for SCIENCE_AVAILABLE
    sci_avlble_dict = segments.segmentlistdict()
    # NOTE: Should this be overrideable in the config file?
    sci_avlble_name = "SCIENCE_AVAILABLE"
    for ifo in scienceSegs.keys():
        sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo]

    sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE',
                            sci_avlble_dict, ifo_list = scienceSegs.keys(),
                            valid_segment=workflow.analysis_time,
                            extension='.xml', tags=tags, directory=outputDir)

    logging.info("Leaving datafind module")
    return FileList(datafindouts), sci_avlble_file, scienceSegs, sci_avlble_name
Beispiel #53
0
def setup_timeslides_workflow(workflow, output_dir=None, tags=[],
                              timeSlideSectionName='ligolw_tisi'):
    '''
    Setup generation of time_slide input files in the workflow.
    Currently used
    only with ligolw_tisi to generate files containing the list of slides to be
    performed in each time slide job.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.
    timeSlideSectionName : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    Returns
    --------
    timeSlideOuts : pycbc.workflow.core.FileList
        The list of time slide files created by this call.
    '''
    logging.info("Entering time slides setup module.")
    make_analysis_dir(output_dir)
    # Get ifo list and full analysis segment for output file naming
    ifoList = workflow.ifos
    ifo_string = workflow.ifo_string
    fullSegment = workflow.analysis_time

    # Identify which time-slides to do by presence of sub-sections in the
    # configuration file
    all_sec = workflow.cp.sections()
    timeSlideSections = [sec for sec in all_sec if sec.startswith('tisi-')]
    timeSlideTags = [(sec.split('-')[-1]).upper() for sec in timeSlideSections]

    timeSlideOuts = FileList([])

    # FIXME: Add ability to specify different exes

    # Make the timeSlideFiles
    for timeSlideTag in timeSlideTags:
        currTags = tags + [timeSlideTag]

        timeSlideMethod = workflow.cp.get_opt_tags("workflow-timeslides",
                                                 "timeslides-method", currTags)

        if timeSlideMethod in ["IN_WORKFLOW", "AT_RUNTIME"]:
            timeSlideExeTag = workflow.cp.get_opt_tags("workflow-timeslides",
                                                    "timeslides-exe", currTags)
            timeSlideExe = select_generic_executable(workflow, timeSlideExeTag)
            timeSlideJob = timeSlideExe(workflow.cp, timeSlideExeTag, ifos=ifo_string,
                                             tags=currTags, out_dir=output_dir)
            timeSlideNode = timeSlideJob.create_node(fullSegment)
            if timeSlideMethod == "AT_RUNTIME":
                workflow.execute_node(timeSlideNode)
            else:
                workflow.add_node(timeSlideNode)
            tisiOutFile = timeSlideNode.output_files[0]
        elif timeSlideMethod == "PREGENERATED":
            timeSlideFilePath = workflow.cp.get_opt_tags("workflow-timeslides",
                                      "timeslides-pregenerated-file", currTags)
            file_url = urlparse.urljoin('file:', urllib.pathname2url(\
                                                  timeSlideFilePath))
            tisiOutFile = File(ifoString, 'PREGEN_TIMESLIDES',
                               fullSegment, file_url, tags=currTags)

        timeSlideOuts.append(tisiOutFile)

    return timeSlideOuts
Beispiel #54
0
def get_segments_file(workflow, name, option_name, out_dir):
    """Get cumulative segments from option name syntax for each ifo.

    Use syntax of configparser string to define the resulting segment_file
    e.x. option_name = +up_flag1,+up_flag2,+up_flag3,-down_flag1,-down_flag2
    Each ifo may have a different string and is stored separately in the file.
    Flags which add time must precede flags which subtract time.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
    name: string
        Name of the segment list being created
    option_name: str
        Name of option in the associated config parser to get the flag list

    returns
    --------
    seg_file: pycbc.workflow.SegFile
        SegFile intance that points to the segment xml file on disk.
    """
    from pycbc.dq import query_str
    make_analysis_dir(out_dir)
    cp = workflow.cp
    start = workflow.analysis_time[0]
    end = workflow.analysis_time[1]

    # Check for veto definer file
    veto_definer = None
    if cp.has_option("workflow-segments", "segments-veto-definer-url"):
        veto_definer = save_veto_definer(workflow.cp, out_dir, [])

    # Check for provided server
    server = "https://segments.ligo.org"
    if cp.has_option("workflow-segments", "segments-database-url"):
        server = cp.get("workflow-segments", "segments-database-url")

    source = "any"
    if cp.has_option("workflow-segments", "segments-source"):
        source = cp.get("workflow-segments", "segments-source")
    if source == "file":
        local_file_path = \
            resolve_url(cp.get("workflow-segments", option_name+"-file"))
        pfn = os.path.join(out_dir, os.path.basename(local_file_path))
        shutil.move(local_file_path, pfn)
        return SegFile.from_segment_xml(pfn)

    segs = {}
    for ifo in workflow.ifos:
        flag_str = cp.get_opt_tags("workflow-segments", option_name, [ifo])
        key = ifo + ':' + name
        segs[key] = query_str(ifo,
                              flag_str,
                              start,
                              end,
                              source=source,
                              server=server,
                              veto_definer=veto_definer)
        logging.info("%s: got %s flags", ifo, option_name)

    return SegFile.from_segment_list_dict(name,
                                          segs,
                                          extension='.xml',
                                          valid_segment=workflow.analysis_time,
                                          directory=out_dir)
Beispiel #55
0
def setup_tmpltbank_workflow(workflow,
                             science_segs,
                             datafind_outs,
                             output_dir=None,
                             psd_files=None,
                             tags=None,
                             return_format=None):
    '''
    Setup template bank section of CBC workflow. This function is responsible
    for deciding which of the various template bank workflow generation
    utilities should be used.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    science_segs : Keyed dictionary of glue.segmentlist objects
        scienceSegs[ifo] holds the science segments to be analysed for each
        ifo.
    datafind_outs : pycbc.workflow.core.FileList
        The file list containing the datafind files.
    output_dir : path string
        The directory where data products will be placed.
    psd_files : pycbc.workflow.core.FileList
        The file list containing predefined PSDs, if provided.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    tmplt_banks : pycbc.workflow.core.FileList
        The FileList holding the details of all the template bank jobs.
    '''
    if tags is None:
        tags = []
    logging.info("Entering template bank generation module.")
    make_analysis_dir(output_dir)
    cp = workflow.cp

    # Parse for options in ini file
    tmpltbankMethod = cp.get_opt_tags("workflow-tmpltbank", "tmpltbank-method",
                                      tags)

    # There can be a large number of different options here, for e.g. to set
    # up fixed bank, or maybe something else
    if tmpltbankMethod == "PREGENERATED_BANK":
        logging.info("Setting template bank from pre-generated bank(s).")
        tmplt_banks = setup_tmpltbank_pregenerated(workflow, tags=tags)
    # Else we assume template banks will be generated in the workflow
    elif tmpltbankMethod == "WORKFLOW_INDEPENDENT_IFOS":
        logging.info("Adding template bank jobs to workflow.")
        if cp.has_option_tags("workflow-tmpltbank",
                              "tmpltbank-link-to-matchedfilter", tags):
            if not cp.has_option_tags("workflow-matchedfilter",
                                      "matchedfilter-link-to-tmpltbank", tags):
                errMsg = "If using tmpltbank-link-to-matchedfilter, you should "
                errMsg = "also use matchedfilter-link-to-tmpltbank."
                logging.warn(errMsg)
            linkToMatchedfltr = True
        else:
            linkToMatchedfltr = False
        if cp.has_option_tags("workflow-tmpltbank",
                              "tmpltbank-compatibility-mode", tags):
            if not linkToMatchedfltr:
                errMsg = "Compatibility mode requires that the "
                errMsg += "tmpltbank-link-to-matchedfilter option is also set."
                raise ValueError(errMsg)
            if not cp.has_option_tags("workflow-matchedfilter",
                                      "matchedfilter-compatibility-mode",
                                      tags):
                errMsg = "If using compatibility mode it must be set both in "
                errMsg += "the template bank and matched-filtering stages."
                raise ValueError(errMsg)
            compatibility_mode = True
        else:
            compatibility_mode = False
        tmplt_banks = setup_tmpltbank_dax_generated(
            workflow,
            science_segs,
            datafind_outs,
            output_dir,
            tags=tags,
            link_to_matchedfltr=linkToMatchedfltr,
            compatibility_mode=compatibility_mode,
            psd_files=psd_files)
    elif tmpltbankMethod == "WORKFLOW_INDEPENDENT_IFOS_NODATA":
        logging.info("Adding template bank jobs to workflow.")
        tmplt_banks = setup_tmpltbank_without_frames(workflow,
                                                     output_dir,
                                                     tags=tags,
                                                     independent_ifos=True,
                                                     psd_files=psd_files)
    elif tmpltbankMethod == "WORKFLOW_NO_IFO_VARIATION_NODATA":
        logging.info("Adding template bank jobs to workflow.")
        tmplt_banks = setup_tmpltbank_without_frames(workflow,
                                                     output_dir,
                                                     tags=tags,
                                                     independent_ifos=False,
                                                     psd_files=psd_files)
    else:
        errMsg = "Template bank method not recognized. Must be either "
        errMsg += "PREGENERATED_BANK, WORKFLOW_INDEPENDENT_IFOS "
        errMsg += "or WORKFLOW_INDEPENDENT_IFOS_NODATA."
        raise ValueError(errMsg)

    # Check the format of the input template bank file and return it in
    # the format requested as per return_format, provided a conversion
    # between the two specific formats has been implemented. Currently,
    # a conversion from xml.gz or xml to hdf is supported, but not vice
    # versa. If a return_format is not specified the function returns
    # the bank in the format as it was inputted.
    tmplt_bank_filename = tmplt_banks[0].name
    ext = tmplt_bank_filename.split('.', 1)[1]
    logging.info("Input bank is a %s file", ext)
    if return_format is None:
        tmplt_banks_return = tmplt_banks
    elif return_format in ('hdf', 'h5', 'hdf5'):
        if ext in ('hdf', 'h5', 'hdf5') or ext in ('xml.gz', 'xml'):
            tmplt_banks_return = pycbc.workflow.convert_bank_to_hdf(
                workflow, tmplt_banks, "bank")
    else:
        if ext == return_format:
            tmplt_banks_return = tmplt_banks
        else:
            raise NotImplementedError("{0} to {1} conversion is not "
                                      "supported.".format(ext, return_format))
    logging.info("Leaving template bank generation module.")
    return tmplt_banks_return
Beispiel #56
0
def setup_coincidence_workflow(workflow,
                               segsList,
                               timeSlideFiles,
                               inspiral_outs,
                               output_dir,
                               veto_cats=[2, 3, 4],
                               tags=[],
                               timeSlideTags=None):
    '''
    This function aims to be the gateway for setting up a set of coincidence
    jobs in a workflow. The goal is that this function can support a
    number of different ways/codes that could be used for doing this.
    For now it only supports ligolw_sstinca.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    segsList : pycbc.workflow.core.FileList
        The list of files returned by workflow's segment module that contains
        pointers to all the segment files generated in the workflow. If the
        coincidence code will be applying the data quality vetoes, then this
        will be used to ensure that the codes get the necessary input to do
        this.
    timeSlideFiles : pycbc.workflow.core.FileList
        An FileList of the timeSlide input files that are needed to
        determine what time sliding needs to be done if the coincidence code
        will be running time slides to facilitate background computations later
        in the workflow.
    inspiral_outs : pycbc.workflow.core.FileList
        An FileList of the matched-filter module output that is used as
        input to the coincidence codes running at this stage.
    output_dir : path
        The directory in which coincidence output will be stored.
    veto_cats : list of ints (optional, default = [2,3,4])
        Veto categories that will be applied in the coincidence jobs. If this
        takes the default value the code will run data quality at cumulative 
        categories 2, 3 and 4. Note that if we change the flag definitions to
        be non-cumulative then this option will need to be revisited.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
    timeSlideTags : list of strings (optional, default = [])
        A list of the tags corresponding to the timeSlideFiles that are to be
        used in this call to the module. This can be used to ensure that the
        injection runs do no time sliding, but the no-injection runs do perform
        time slides (or vice-versa if you prefer!)
    Returns
    --------
    coinc_outs : pycbc.workflow.core.FileList
        A list of the *final* outputs of the coincident stage. This *does not*
        include any intermediate products produced within the workflow. If you
        require access to intermediate products call the various sub-functions
        in this module directly.
    '''
    logging.info('Entering coincidence setup module.')
    make_analysis_dir(output_dir)

    # Parse for options in .ini file
    coincidenceMethod = workflow.cp.get_opt_tags("workflow-coincidence",
                                                 "coincidence-method", tags)

    # Scope here for adding different options/methods here. For now we only
    # have the single_stage ihope method which consists of using ligolw_add
    # to create a large job for coincidence and then running ligolw_thinca
    # on that output.
    if coincidenceMethod == "WORKFLOW_DISCRETE_SLIDES":
        # If I am doing exact match I can parallelize these jobs and reduce
        # memory footprint. This will require all input inspiral jobs to have
        # a JOB%d tag to distinguish between them.
        if workflow.cp.has_option_tags("workflow-coincidence",
                                       "coincidence-exact-match-parallelize",
                                       tags):
            parallelize_split_input = True
        else:
            parallelize_split_input = False

        # If you want the ligolw_add outputs, call this function directly
        coinc_outs, other_outs = setup_coincidence_workflow_ligolw_thinca(
            workflow,
            segsList,
            timeSlideFiles,
            inspiral_outs,
            output_dir,
            veto_cats=veto_cats,
            tags=tags,
            timeSlideTags=timeSlideTags,
            parallelize_split_input=parallelize_split_input)
    else:
        errMsg = "Coincidence method not recognized. Must be one of "
        errMsg += "WORKFLOW_DISCRETE_SLIDES (currently only one option)."
        raise ValueError(errMsg)

    logging.info('Leaving coincidence setup module.')

    return coinc_outs, other_outs
Beispiel #57
0
def setup_analysislogging(workflow, segs_list, insps, args, output_dir,
                          program_name="workflow", tags=[]):
    """
    This module sets up the analysis logging xml file that contains the
    following information:

    * Command line arguments that the code was run with
    * Segment list of times marked as SCIENCE
    * Segment list of times marked as SCIENCE and "OK" ie. not CAT_1 vetoed
    * Segment list of times marked as SCIENCE_OK and present on the cluster
    * The times that will be analysed by the matched-filter jobs

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance.
    segs_list : pycbc.workflow.core.FileList
        A list of Files containing the information needed to generate the
        segments above. For segments generated at run time the associated
        segmentlist is a property of this object.
    insps : pycbc.workflow.core.FileList
        The output files from the matched-filtering module. Used to identify
        what times have been analysed in this workflow.
    output_dir : path
        Directory to output any files to.
    program_name : string (optional, default = "workflow")
        The program name to stick in the process/process_params tables.
    tags : list (optional, default = [])
        If given restrict to considering inspiral and segment files that
        are tagged with all tags in this list.
    """
    logging.info("Entering analysis logging module.")
    make_analysis_dir(output_dir)

    # Construct the summary XML file
    outdoc = ligolw.Document()
    outdoc.appendChild(ligolw.LIGO_LW())

    # Add process and process_params tables
    proc_id = process.register_to_xmldoc(outdoc, program_name,
                                            vars(args) ).process_id

    # Now add the various segment lists to this file
    summ_segs = segmentlist([workflow.analysis_time])
    
    # If tags is given filter by tags
    if tags:
        for tag in tags:
            segs_list = segs_list.find_output_with_tag(tag)
            insps = insps.find_output_with_tag(tag)

    for ifo in workflow.ifos:
        # Lets get the segment lists we need
        seg_ifo_files = segs_list.find_output_with_ifo(ifo)
        # SCIENCE
        sci_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE')
        if len(sci_seg_file) == 1:
            sci_seg_file = sci_seg_file[0]
            sci_segs = sci_seg_file.segmentList
            sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id,
                                                   ifo, "CBC_WORKFLOW_SCIENCE", 0)
            segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id,
                                                                      sci_segs)
            segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id,
                                                         summ_segs, comment='')
        elif sci_seg_file:
            # FIXME: While the segment module is still fractured (#127) this
            #        may not work. Please update when #127 is resolved
            pass
            #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \
            #          %(len(sci_seg_file), ifo, 'SCIENCE')
            #raise ValueError(err_msg)

        # SCIENCE_OK
        sci_ok_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE_OK')
        if len(sci_ok_seg_file) == 1:
            sci_ok_seg_file = sci_ok_seg_file[0]
            sci_ok_segs = sci_ok_seg_file.segmentList
            sci_ok_def_id = segmentdb_utils.add_to_segment_definer(outdoc,
                                       proc_id, ifo, "CBC_WORKFLOW_SCIENCE_OK", 0)
            segmentdb_utils.add_to_segment(outdoc, proc_id, sci_ok_def_id,
                                                                   sci_ok_segs)
            segmentdb_utils.add_to_segment_summary(outdoc, proc_id,
                                          sci_ok_def_id, summ_segs, comment='')
        elif sci_ok_seg_file:
            # FIXME: While the segment module is still fractured (#127) this
            #        may not work. Please update when #127 is resolved
            pass
            #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \
            #          %(len(sci_ok_seg_file), ifo, 'SCIENCE_OK')
            #raise ValueError(err_msg)


        # SCIENCE_AVAILABLE
        sci_available_seg_file = seg_ifo_files.find_output_with_tag(\
                                                           'SCIENCE_AVAILABLE')
        if len(sci_available_seg_file) == 1:
            sci_available_seg_file = sci_available_seg_file[0]
            sci_available_segs = sci_available_seg_file.segmentList
            sci_available_def_id = segmentdb_utils.add_to_segment_definer(\
                        outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_AVAILABLE", 0)
            segmentdb_utils.add_to_segment(outdoc, proc_id,
                                      sci_available_def_id, sci_available_segs)
            segmentdb_utils.add_to_segment_summary(outdoc, proc_id,
                                   sci_available_def_id, summ_segs, comment='')
        elif sci_available_seg_file:
            # FIXME: While the segment module is still fractured (#127) this
            #        may not work. Please update when #127 is resolved
            pass
            #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \
            #          %(len(sci_available_seg_file), ifo, 'SCIENCE_AVAILABLE')
            #raise ValueError(err_msg)

        # ANALYSABLE - This one needs to come from inspiral outs
        ifo_insps = insps.find_output_with_ifo(ifo)
        analysable_segs = ifo_insps.get_times_covered_by_files()

        analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc,
                                     proc_id, ifo, "CBC_WORKFLOW_ANALYSABLE", 0)
        segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id,
                                                               analysable_segs)
        segmentdb_utils.add_to_segment_summary(outdoc, proc_id,
                                      analysable_def_id, summ_segs, comment='')

    summ_file = File(workflow.ifos, "WORKFLOW_SUMMARY",
                     workflow.analysis_time, extension=".xml",
                     directory=output_dir)
    summ_file.PFN(summ_file.storage_path, site='local')
    utils.write_filename(outdoc, summ_file.storage_path)

    return FileList([summ_file])
Beispiel #58
0
def setup_matchedfltr_workflow(workflow,
                               science_segs,
                               datafind_outs,
                               tmplt_banks,
                               output_dir=None,
                               injection_file=None,
                               tags=None):
    '''
    This function aims to be the gateway for setting up a set of matched-filter
    jobs in a workflow. This function is intended to support multiple
    different ways/codes that could be used for doing this. For now the only
    supported sub-module is one that runs the matched-filtering by setting up
    a serious of matched-filtering jobs, from one executable, to create
    matched-filter triggers covering the full range of science times for which
    there is data and a template bank file.

    Parameters
    -----------
    Workflow : pycbc.workflow.core.Workflow
        The workflow instance that the coincidence jobs will be added to.
    science_segs : ifo-keyed dictionary of glue.segments.segmentlist instances
        The list of times that are being analysed in this workflow. 
    datafind_outs : pycbc.workflow.core.FileList
        An FileList of the datafind files that are needed to obtain the
        data used in the analysis.
    tmplt_banks : pycbc.workflow.core.FileList
        An FileList of the template bank files that will serve as input
        in this stage.
    output_dir : path
        The directory in which output will be stored.
    injection_file : pycbc.workflow.core.File, optional (default=None)
        If given the file containing the simulation file to be sent to these
        jobs on the command line. If not given no file will be sent.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
        
    Returns
    -------
    inspiral_outs : pycbc.workflow.core.FileList
        A list of output files written by this stage. This *will not* contain
        any intermediate products produced within this stage of the workflow.
        If you require access to any intermediate products produced at this
        stage you can call the various sub-functions directly.
    '''
    if tags is None:
        tags = []
    logging.info("Entering matched-filtering setup module.")
    make_analysis_dir(output_dir)
    cp = workflow.cp

    # Parse for options in .ini file
    mfltrMethod = cp.get_opt_tags("workflow-matchedfilter",
                                  "matchedfilter-method", tags)

    # Could have a number of choices here
    if mfltrMethod == "WORKFLOW_INDEPENDENT_IFOS":
        logging.info("Adding matched-filter jobs to workflow.")
        if cp.has_option_tags("workflow-matchedfilter",
                              "matchedfilter-link-to-tmpltbank", tags):
            if not cp.has_option_tags("workflow-tmpltbank",
                                      "tmpltbank-link-to-matchedfilter", tags):
                errMsg = "If using matchedfilter-link-to-tmpltbank, you should "
                errMsg += "also use tmpltbank-link-to-matchedfilter."
                logging.warn(errMsg)
            linkToTmpltbank = True
        else:
            linkToTmpltbank = False
        if cp.has_option_tags("workflow-matchedfilter",
                              "matchedfilter-compatibility-mode", tags):
            if not linkToTmpltbank:
                errMsg = "Compatibility mode requires that the "
                errMsg += "matchedfilter-link-to-tmpltbank option is also set."
                raise ValueError(errMsg)
            if not cp.has_option_tags("workflow-tmpltbank",
                                      "tmpltbank-compatibility-mode", tags):
                errMsg = "If using compatibility mode it must be set both in "
                errMsg += "the template bank and matched-filtering stages."
                raise ValueError(errMsg)
            compatibility_mode = True
        else:
            compatibility_mode = False

        inspiral_outs = setup_matchedfltr_dax_generated(
            workflow,
            science_segs,
            datafind_outs,
            tmplt_banks,
            output_dir,
            injection_file=injection_file,
            tags=tags,
            link_to_tmpltbank=linkToTmpltbank,
            compatibility_mode=compatibility_mode)
    elif mfltrMethod == "WORKFLOW_MULTIPLE_IFOS":
        logging.info("Adding matched-filter jobs to workflow.")
        inspiral_outs = setup_matchedfltr_dax_generated_multi(
            workflow,
            science_segs,
            datafind_outs,
            tmplt_banks,
            output_dir,
            injection_file=injection_file,
            tags=tags)
    else:
        errMsg = "Matched filter method not recognized. Must be one of "
        errMsg += "WORKFLOW_INDEPENDENT_IFOS (currently only one option)."
        raise ValueError(errMsg)

    logging.info("Leaving matched-filtering setup module.")
    return inspiral_outs
Beispiel #59
0
def setup_injection_workflow(workflow,
                             output_dir=None,
                             inj_section_name='injections',
                             exttrig_file=None,
                             tags=None):
    """
    This function is the gateway for setting up injection-generation jobs in a
    workflow. It should be possible for this function to support a number
    of different ways/codes that could be used for doing this, however as this
    will presumably stay as a single call to a single code (which need not be
    inspinj) there are currently no subfunctions in this moudle.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which injection files will be stored.
    inj_section_name : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.

    Returns
    --------
    inj_files : pycbc.workflow.core.FileList
        The list of injection files created by this call.
    inj_tags : list of strings
        The tag corresponding to each injection file and used to uniquely
        identify them. The FileList class contains functions to search
        based on tags.
    """
    if tags is None:
        tags = []
    logging.info("Entering injection module.")
    make_analysis_dir(output_dir)

    # Get full analysis segment for output file naming
    full_segment = workflow.analysis_time
    ifos = workflow.ifos

    # Identify which injections to do by presence of sub-sections in
    # the configuration file
    inj_tags = []
    inj_files = FileList([])

    for section in workflow.cp.get_subsections(inj_section_name):
        inj_tag = section.upper()
        curr_tags = tags + [inj_tag]

        # Parse for options in ini file
        injection_method = workflow.cp.get_opt_tags("workflow-injections",
                                                    "injections-method",
                                                    curr_tags)

        if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]:
            # FIXME: Add ability to specify different exes
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos='HL',
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment)
            if injection_method == "AT_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]
            inj_files.append(inj_file)
        elif injection_method == "PREGENERATED":
            file_attrs = {
                'ifos': ['HL'],
                'segs': full_segment,
                'tags': curr_tags
            }
            injection_path = workflow.cp.get_opt_tags(
                "workflow-injections", "injections-pregenerated-file",
                curr_tags)
            curr_file = resolve_url_to_file(injection_path, attrs=file_attrs)
            inj_files.append(curr_file)
        elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]:
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos=ifos,
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment, exttrig_file)
            if injection_method == "AT_COH_PTF_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections", "em-bright-only"):
                em_filter_job = PycbcDarkVsBrightInjectionsExecutable(
                    workflow.cp,
                    'em_bright_filter',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = em_filter_job.create_node(inj_file, full_segment,
                                                 curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-jitter-skyloc"):
                jitter_job = LigolwCBCJitterSkylocExecutable(
                    workflow.cp,
                    'jitter_skyloc',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = jitter_job.create_node(inj_file, full_segment,
                                              curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-align-total-spin"):
                align_job = LigolwCBCAlignTotalSpinExecutable(
                    workflow.cp,
                    'align_total_spin',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = align_job.create_node(inj_file, full_segment, curr_tags)

                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            inj_files.append(inj_file)
        else:
            err = "Injection method must be one of IN_WORKFLOW, "
            err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method)
            raise ValueError(err)

        inj_tags.append(inj_tag)

    logging.info("Leaving injection module.")
    return inj_files, inj_tags