Ejemplo n.º 1
0
    def create_node(self, parent, inj_trigs, inj_string, max_inc, segment):
        node = Node(self)

        trig_name = self.cp.get("workflow", "trigger-name")
        node.add_opt("--inj-string", inj_string)
        node.add_opt("--max-inclination", max_inc)
        node.add_opt("--inj-cache", "%s" % parent.storage_path)

        out_files = FileList([])
        for inj_trig in inj_trigs:
            out_string = inj_string.split(max_inc)[0]
            out_file_tag = [out_string, "FILTERED", max_inc, inj_trig.tag_str.rsplit("_", 1)[-1]]
            out_file = File(
                self.ifos,
                inj_trig.description,
                inj_trig.segment,
                extension="xml",
                directory=self.out_dir,
                tags=out_file_tag,
            )
            out_file.PFN(out_file.cache_entry.path, site="local")
            out_files.append(out_file)

        node.add_opt("--output-dir", self.out_dir)

        return node, out_files
Ejemplo n.º 2
0
    def create_node(self, parent, inj_trigs, inj_string, max_inc, segment):
        node = Node(self)

        trig_name = self.cp.get('workflow', 'trigger-name')
        node.add_opt('--inj-string', inj_string)
        node.add_opt('--max-inclination', max_inc)
        node.add_opt('--inj-cache', '%s' % parent.storage_path)

        out_files = FileList([])
        for inj_trig in inj_trigs:
            out_file_tag = [
                inj_string, "FILTERED", max_inc,
                inj_trig.tag_str.rsplit('_', 1)[-1]
            ]
            out_file = File(
                self.ifos,
                inj_trig.description,
                inj_trig.segment,
                extension="xml",
                directory=self.out_dir,
                tags=out_file_tag)
            out_file.PFN(out_file.cache_entry.path, site="local")
            out_files.append(out_file)

        node.add_opt('--output-dir', self.out_dir)

        return node, out_files
Ejemplo n.º 3
0
def setup_background_bins_inj(workflow, coinc_files, background_file, bank_file, out_dir, tags=None):
    tags = [] if tags is None else tags
    
    bins_exe = PyCBCDistributeBackgroundBins(workflow.cp, 'distribute_background_bins', 
                                       ifos=workflow.ifos, tags=tags, out_dir=out_dir)
                                       
    statmap_exe = PyCBCStatMapInjExecutable(workflow.cp, 'statmap_inj',
                                              ifos=workflow.ifos,
                                              tags=tags, out_dir=out_dir)   
    
    cstat_exe = PyCBCCombineStatmap(workflow.cp, 'combine_statmap', ifos=workflow.ifos,
                                    tags=tags, out_dir=out_dir)                       
           
    background_bins = workflow.cp.get_opt_tags('workflow-coincidence', 'background-bins', tags).split(' ')   
    background_bins = [x for x in background_bins if x != '']
    
    for inj_type in ['injinj', 'injfull', 'fullinj']:          
        bins_node = bins_exe.create_node(FileList(coinc_files[inj_type]), bank_file, background_bins, tags=tags + [inj_type])
        workflow += bins_node
        coinc_files[inj_type] = bins_node.output_files
    
    stat_files = FileList([])
    for i in range(len(background_bins)):
        statnode = statmap_exe.create_node(FileList([coinc_files['injinj'][i]]), FileList([background_file[i]]), 
                                     FileList([coinc_files['injfull'][i]]), FileList([coinc_files['fullinj'][i]]), 
                                     tags=tags + ['BIN_%s' % i])
        workflow += statnode
        stat_files.append(statnode.output_files[0])
        
    cstat_node = cstat_exe.create_node(stat_files, tags=tags)
    workflow += cstat_node
    
    return cstat_node.output_files[0]
Ejemplo n.º 4
0
def setup_background_bins(workflow, coinc_files, bank_file, out_dir, tags=None):
    tags = [] if tags is None else tags
    
    bins_exe = PyCBCDistributeBackgroundBins(workflow.cp, 'distribute_background_bins', 
                                       ifos=workflow.ifos, tags=tags, out_dir=out_dir)
                                       
    statmap_exe = PyCBCStatMapExecutable(workflow.cp, 'statmap',
                                              ifos=workflow.ifos,
                                              tags=tags, out_dir=out_dir)       

    cstat_exe = PyCBCCombineStatmap(workflow.cp, 'combine_statmap', ifos=workflow.ifos,
                                    tags=tags, out_dir=out_dir)                       
           
    background_bins = workflow.cp.get_opt_tags('workflow-coincidence', 'background-bins', tags).split(' ')             
    background_bins = [x for x in background_bins if x != '']
    bins_node = bins_exe.create_node(coinc_files, bank_file, background_bins)
    workflow += bins_node
    
    stat_files = FileList([])
    for i, coinc_file in enumerate(bins_node.output_files):
        statnode = statmap_exe.create_node(FileList([coinc_file]), tags=tags + ['BIN_%s' % i])
        workflow += statnode
        stat_files.append(statnode.output_files[0])
        stat_files[i].bin_name = bins_node.names[i]
    
    cstat_node = cstat_exe.create_node(stat_files, tags=tags)
    workflow += cstat_node
    
    return cstat_node.output_files[0], stat_files
Ejemplo n.º 5
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.
   
    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.
  
    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """ 
    datafind_filelist = FileList([])
    prev_file = None
    for cache in datafindcache_list:
        curr_ifo = cache.ifo
        for frame in cache:
            # Don't add a new workflow file entry for this frame if
            # if is a duplicate. These are assumed to be returned in time
            # order
            if prev_file and prev_file.cache_entry.url == frame.url:
                continue

            currFile = File(curr_ifo, frame.description,
                    frame.segment, file_url=frame.url, use_tmp_subdirs=True)
            currFile.PFN(frame.path, site='local')
            datafind_filelist.append(currFile)
            prev_file = currFile
    return datafind_filelist
Ejemplo n.º 6
0
def setup_psd_pregenerated(workflow, tags=[]):
    '''
    Setup CBC workflow to use pregenerated psd files.
    The file given in cp.get('workflow','pregenerated-psd-file-(ifo)') will 
    be used as the --psd-file argument to geom_nonspinbank, geom_aligned_bank
    and pycbc_plot_psd_file.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    psd_files : pycbc.workflow.core.FileList
        The FileList holding the gating files
    '''
    psd_files = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_PSD"

    # Check for one psd for all ifos
    try:
        pre_gen_file = cp.get_opt_tags('workflow-psd',
                        'psd-pregenerated-file', tags)
        pre_gen_file = resolve_url(pre_gen_file)
        file_url = urlparse.urljoin('file:',
                                     urllib.pathname2url(pre_gen_file))
        curr_file = File(workflow.ifos, user_tag, global_seg, file_url,
                                                    tags=tags)
        curr_file.PFN(file_url, site='local')
        psd_files.append(curr_file)
    except ConfigParser.Error:
        # Check for one psd per ifo
        for ifo in workflow.ifos:
            try:
                pre_gen_file = cp.get_opt_tags('workflow-psd',
                                'psd-pregenerated-file-%s' % ifo.lower(),
                                tags)
                pre_gen_file = resolve_url(pre_gen_file)
                file_url = urlparse.urljoin('file:',
                                             urllib.pathname2url(pre_gen_file))
                curr_file = File(ifo, user_tag, global_seg, file_url,
                                                            tags=tags)
                curr_file.PFN(file_url, site='local')
                psd_files.append(curr_file)

            except ConfigParser.Error:
                # It's unlikely, but not impossible, that only some ifos
                # will have pregenerated PSDs
                logging.warn("No psd file specified for IFO %s." % (ifo,))
                pass
            
    return psd_files
Ejemplo n.º 7
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.

    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.

    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """
    datafind_filelist = FileList([])
    prev_file = None
    for cache in datafindcache_list:
        curr_ifo = cache.ifo
        for frame in cache:
            # Don't add a new workflow file entry for this frame if
            # if is a duplicate. These are assumed to be returned in time
            # order
            if prev_file:
                prev_name = prev_file.cache_entry.url.split('/')[-1]
                this_name = frame.url.split('/')[-1]
                if prev_name == this_name:
                    continue

            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost','file://')

            currFile = File(curr_ifo, frame.description,
                    frame.segment, file_url=frame.url, use_tmp_subdirs=True)
            if frame.url.startswith('file://'):
                currFile.PFN(frame.url, site='local')
                if frame.url.startswith(
                    'file:///cvmfs/oasis.opensciencegrid.org/'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    currFile.PFN(frame.url, site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'root://xrootd-local.unl.edu/user/'), site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://red-gridftp.unl.edu/user/'), site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://ldas-grid.ligo.caltech.edu/hdfs/'), site='osg')
            else:
                currFile.PFN(frame.url, site='notlocal')
            datafind_filelist.append(currFile)
            prev_file = currFile
    return datafind_filelist
Ejemplo n.º 8
0
    def create_node(self, trig_files=None, segment_dir=None, out_tags=[],
                    tags=[]):
        node = Node(self)

        if not trig_files:
            raise ValueError("%s must be supplied with trigger files"
                              % self.name)

        # Data options
        pad_data = self.cp.get('inspiral', 'pad-data')
        if pad_data is None:
            raise ValueError("The option pad-data is a required option of "
                             "%s. Please check the ini file." % self.name)

        num_trials = int(self.cp.get("trig_combiner", "num-trials"))
        trig_name = self.cp.get('workflow', 'trigger-name')
        if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
                self.cp.has_option_tag('inspiral', 'do-short-slides',
                                       'coherent_no_injections'):
            node.add_opt('--short-slides')
        
        node.add_opt('--grb-name', trig_name)
        
        node.add_opt('--pad-data', pad_data)
        node.add_opt('--segment-length', self.cp.get('inspiral',
                                                     'segment-duration'))
        node.add_opt('--ifo-tag', self.ifos)
        node.add_opt('--user-tag', 'INSPIRAL')

        # Set input / output options
        node.add_input_list_opt('--input-files', trig_files)

        node.add_opt('--segment-dir', segment_dir)
        node.add_opt('--output-dir', self.out_dir)

        out_files = FileList([])
        for out_tag in out_tags:
            out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
                            directory=self.out_dir, extension='xml.gz',
                            tags=["GRB%s" % trig_name, out_tag],
                            store_file=self.retain_files)
            out_files.append(out_file)

        for trial in range(1, num_trials + 1):
            out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
                            directory=self.out_dir, extension='xml.gz',
                            tags=["GRB%s" % trig_name, "OFFTRIAL_%d" % trial],
                            store_file=self.retain_files)
            out_files.append(out_file)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        return node, out_files
Ejemplo n.º 9
0
def setup_gate_pregenerated(workflow, output_dir=None, tags=None):
    '''
    Setup CBC workflow to use pregenerated gating files.
    The file given in cp.get('workflow','gating-file-(ifo)') will
    be used as the --gating-file for all jobs for that ifo.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    output_dir : path string
       The directory where data products will be placed. 
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    gate_files : pycbc.workflow.core.FileList
        The FileList holding the gating files
    '''
    if tags is None:
        tags = []
    gate_files = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_GATE"

    for ifo in workflow.ifos:
        try:
            pre_gen_file = cp.get_opt_tags('workflow-gating',
                            'gating-file-%s' % ifo.lower(),
                            tags)
            pre_gen_file = resolve_url(pre_gen_file,
                                       os.path.join(os.getcwd(),output_dir))
            file_url = urlparse.urljoin('file:',
                                         urllib.pathname2url(pre_gen_file))
            curr_file = File(ifo, user_tag, global_seg, file_url,
                                                                 tags=tags)
            curr_file.PFN(file_url, site='local')
            gate_files.append(curr_file)

            logging.info("Using gating file %s for %s", file_url, ifo)

        except ConfigParser.Error:
            logging.info("No gating file specified for %s", ifo)
        
    return gate_files
Ejemplo n.º 10
0
def setup_gate_pregenerated(workflow, tags=None):
    '''
    Setup CBC workflow to use pregenerated gating files.
    The file given in cp.get('workflow','pregenerated-gating-file-(ifo)') will 
    be used as the --gating-file for all matched-filtering jobs for that ifo.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    gate_files : pycbc.workflow.core.FileList
        The FileList holding the gating files
    '''
    if tags is None:
        tags = []
    gate_files = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_GATE"

    for ifo in workflow.ifos:
        try:
            pre_gen_file = cp.get_opt_tags(
                'workflow-gating', 'gating-pregenerated-file-%s' % ifo.lower(),
                tags)
            pre_gen_file = resolve_url(pre_gen_file)
            file_url = urlparse.urljoin('file:',
                                        urllib.pathname2url(pre_gen_file))
            curr_file = File(ifo, user_tag, global_seg, file_url, tags=tags)
            curr_file.PFN(file_url, site='local')
            gate_files.append(curr_file)

        except ConfigParser.Error:
            # It's unlikely, but not impossible, that only some ifos
            # will be gated
            logging.warn("No gating file specified for IFO %s." % (ifo, ))
            pass

    return gate_files
Ejemplo n.º 11
0
def setup_statmap_inj(workflow, ifos, coinc_files, background_file,
                      out_dir, tags=None):
    tags = [] if tags is None else tags

    statmap_exe = PyCBCStatMapInjExecutable(workflow.cp,
                                            'statmap_inj',
                                            ifos=ifos,
                                            tags=tags, out_dir=out_dir)

    ifolist = ' '.join(ifos)
    stat_node = statmap_exe.create_node(FileList(coinc_files['injinj']),
                                        background_file,
                                        FileList(coinc_files['injfull']),
                                        FileList(coinc_files['fullinj']),
                                        ifolist)
    workflow.add_node(stat_node)
    return stat_node.output_files[0]
Ejemplo n.º 12
0
def make_single_template_plots(workflow, segs, seg_name, params,
                                   out_dir, inj_file=None, exclude=None,
                                   require=None, tags=None, params_str=None,
                                   use_exact_inj_params=False):
    tags = [] if tags is None else tags
    makedir(out_dir)
    name = 'single_template_plot'
    secs = requirestr(workflow.cp.get_subsections(name), require)
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        for ifo in workflow.ifos:
            # Reanalyze the time around the trigger in each detector
            node = PlotExecutable(workflow.cp, 'single_template', ifos=[ifo],
                              out_dir=out_dir, tags=[tag] + tags).create_node()
            if use_exact_inj_params:
                node.add_opt('--use-params-of-closest-injection')
            else:
                node.add_opt('--mass1', "%.6f" % params['mass1'])
                node.add_opt('--mass2', "%.6f" % params['mass2'])
                node.add_opt('--spin1z',"%.6f" % params['spin1z'])
                node.add_opt('--spin2z',"%.6f" % params['spin2z'])
            # str(numpy.float64) restricts to 2d.p. BE CAREFUL WITH THIS!!!
            str_trig_time = '%.6f' %(params[ifo + '_end_time'])
            node.add_opt('--trigger-time', str_trig_time)
            node.add_input_opt('--inspiral-segments', segs)
            if inj_file is not None:
                node.add_input_opt('--injection-file', inj_file)
            node.add_opt('--segment-name', seg_name)
            node.new_output_file_opt(workflow.analysis_time, '.hdf',
                                     '--output-file', store_file=False)
            data = node.output_files[0]
            workflow += node
            # Make the plot for this trigger and detector
            node = PlotExecutable(workflow.cp, name, ifos=[ifo],
                              out_dir=out_dir, tags=[tag] + tags).create_node()
            node.add_input_opt('--single-template-file', data)
            node.new_output_file_opt(workflow.analysis_time, '.png',
                                     '--output-file')
            title="'%s SNR and chi^2 timeseries" %(ifo) 
            if params_str is not None:
                title+= " using %s" %(params_str)
            title+="'"
            node.add_opt('--plot-title', title)
            caption = "'The SNR and chi^2 timeseries around the injection"
            if params_str is not None:
                caption += " using %s" %(params_str)
            if use_exact_inj_params:
                caption += ". The injection itself was used as the template.'"
            else:
                caption += ". The template used has the following parameters: "
                caption += "mass1=%s, mass2=%s, spin1z=%s, spin2z=%s'"\
                       %(params['mass1'], params['mass2'], params['spin1z'],
                         params['spin2z'])
            node.add_opt('--plot-caption', caption)
            workflow += node
            files += node.output_files
    return files
Ejemplo n.º 13
0
def setup_splittable_dax_generated(workflow, input_tables, out_dir, tags):
    '''
    Function for setting up the splitting jobs as part of the workflow.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    input_tables : pycbc.workflow.core.FileList
        The input files to be split up.
    out_dir : path
        The directory in which output will be written.

    Returns
    --------
    split_table_outs : pycbc.workflow.core.FileList
        The list of split up files as output from this job.
    '''
    cp = workflow.cp

    # Get values from ini file
    try:
        num_splits = cp.get_opt_tags("workflow-splittable",
                                     "splittable-num-banks", tags)
    except BaseException:
        inj_interval = int(
            cp.get_opt_tags("workflow-splittable", "splitinjtable-interval",
                            tags))
        if cp.has_option_tags("em_bright_filter", "max-keep", tags) and \
                cp.has_option("workflow-injections", "em-bright-only"):
            num_injs = int(
                cp.get_opt_tags("em_bright_filter", "max-keep", tags))
        else:
            num_injs = int(
                cp.get_opt_tags("workflow-injections", "num-injs", tags))
        inj_tspace = float(abs(workflow.analysis_time)) / num_injs
        num_splits = int(inj_interval // inj_tspace) + 1

    split_exe_tag = cp.get_opt_tags("workflow-splittable",
                                    "splittable-exe-tag", tags)
    split_exe = os.path.basename(cp.get("executables", split_exe_tag))
    # Select the appropriate class
    exe_class = select_splitfilejob_instance(split_exe)

    # Set up output structure
    out_file_groups = FileList([])

    # Set up the condorJob class for the current executable
    curr_exe_job = exe_class(workflow.cp,
                             split_exe_tag,
                             num_splits,
                             out_dir=out_dir)

    for input in input_tables:
        node = curr_exe_job.create_node(input, tags=tags)
        workflow.add_node(node)
        out_file_groups += node.output_files
    return out_file_groups
Ejemplo n.º 14
0
def make_inference_inj_plots(workflow,
                             inference_files,
                             output_dir,
                             parameters,
                             name="inference_recovery",
                             analysis_seg=None,
                             tags=None):
    """ Sets up the recovered versus injected parameter plot in the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    inference_files: pycbc.workflow.FileList
        The files with posterior samples.
    output_dir: str
        The directory to store result plots and files.
    parameters : list
        A ``list`` of parameters. Each parameter gets its own plot.
    name: str
        The name in the [executables] section of the configuration file
        to use.
    analysis_segs: {None, ligo.segments.Segment}
       The segment this job encompasses. If None then use the total analysis
       time from the workflow.
    tags: {None, optional}
        Tags to add to the inference executables.

    Returns
    -------
    pycbc.workflow.FileList
        A list of result and output files.
    """

    # default values
    tags = [] if tags is None else tags
    analysis_seg = workflow.analysis_time \
                       if analysis_seg is None else analysis_seg
    output_files = FileList([])

    # make the directory that will contain the output files
    makedir(output_dir)

    # add command line options
    for (ii, param) in enumerate(parameters):
        plot_exe = PlotExecutable(workflow.cp,
                                  name,
                                  ifos=workflow.ifos,
                                  out_dir=output_dir,
                                  tags=tags + ['param{}'.format(ii)])
        node = plot_exe.create_node()
        node.add_input_list_opt("--input-file", inference_files)
        node.new_output_file_opt(analysis_seg, ".png", "--output-file")
        node.add_opt("--parameters", param)
        workflow += node
        output_files += node.output_files

    return output_files
Ejemplo n.º 15
0
def make_gating_node(workflow, datafind_files, outdir=None, tags=None):
    '''
    Generate jobs for autogating the data for PyGRB runs.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    datafind_files : pycbc.workflow.core.FileList
        A FileList containing the frame files to be gated.
    outdir : string
        Path of the output directory
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    condition_strain_nodes : list
        List containing the pycbc.workflow.core.Node objects representing the
        autogating jobs.
    condition_strain_outs : pycbc.workflow.core.FileList
        FileList containing the pycbc.workflow.core.File objects representing
        the gated frame files.
    '''

    cp = workflow.cp
    if tags is None:
        tags = []
    
    condition_strain_class = select_generic_executable(workflow,
                                                       "condition_strain")
    condition_strain_nodes = []
    condition_strain_outs = FileList([])
    for ifo in workflow.ifos:
        input_files = FileList([datafind_file for datafind_file in \
                                datafind_files if datafind_file.ifo == ifo])
        condition_strain_jobs = condition_strain_class(cp, "condition_strain",
                ifo=ifo, out_dir=outdir, tags=tags)
        condition_strain_node, condition_strain_out = \
                condition_strain_jobs.create_node(input_files, tags=tags)
        condition_strain_nodes.append(condition_strain_node)
        condition_strain_outs.extend(FileList([condition_strain_out]))

    return condition_strain_nodes, condition_strain_outs
Ejemplo n.º 16
0
def setup_gate_pregenerated(workflow, tags=[]):
    '''
    Setup CBC workflow to use pregenerated gating files.
    The file given in cp.get('workflow','pregenerated-gating-file-(ifo)') will 
    be used as the --gating-file for all matched-filtering jobs for that ifo.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    gate_files : pycbc.workflow.core.FileList
        The FileList holding the gating files
    '''
    gate_files = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_GATE"

    for ifo in workflow.ifos:
        try:
            pre_gen_file = cp.get_opt_tags('workflow-gating',
                            'gating-pregenerated-file-%s' % ifo.lower(),
                            tags)
            pre_gen_file = resolve_url(pre_gen_file)
            file_url = urlparse.urljoin('file:',
                                         urllib.pathname2url(pre_gen_file))
            curr_file = File(ifo, user_tag, global_seg, file_url,
                                                                 tags=tags)
            curr_file.PFN(file_url, site='local')
            gate_files.append(curr_file)

        except ConfigParser.Error:
            # It's unlikely, but not impossible, that only some ifos
            # will be gated
            logging.warn("No gating file specified for IFO %s." % (ifo,))
            pass
        
    return gate_files
Ejemplo n.º 17
0
def setup_background_bins(workflow,
                          coinc_files,
                          bank_file,
                          out_dir,
                          tags=None):
    tags = [] if tags is None else tags

    bins_exe = PyCBCDistributeBackgroundBins(workflow.cp,
                                             'distribute_background_bins',
                                             ifos=workflow.ifos,
                                             tags=tags,
                                             out_dir=out_dir)

    statmap_exe = PyCBCStatMapExecutable(workflow.cp,
                                         'statmap',
                                         ifos=workflow.ifos,
                                         tags=tags,
                                         out_dir=out_dir)

    cstat_exe = PyCBCCombineStatmap(workflow.cp,
                                    'combine_statmap',
                                    ifos=workflow.ifos,
                                    tags=tags,
                                    out_dir=out_dir)

    background_bins = workflow.cp.get_opt_tags('workflow-coincidence',
                                               'background-bins',
                                               tags).split(' ')
    background_bins = [x for x in background_bins if x != '']
    bins_node = bins_exe.create_node(coinc_files, bank_file, background_bins)
    workflow += bins_node

    stat_files = FileList([])
    for i, coinc_file in enumerate(bins_node.output_files):
        statnode = statmap_exe.create_node(FileList([coinc_file]),
                                           tags=tags + ['BIN_%s' % i])
        workflow += statnode
        stat_files.append(statnode.output_files[0])
        stat_files[i].bin_name = bins_node.names[i]

    cstat_node = cstat_exe.create_node(stat_files, tags=tags)
    workflow += cstat_node

    return cstat_node.output_files[0], stat_files
Ejemplo n.º 18
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.
   
    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.
  
    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """
    datafind_filelist = FileList([])
    prev_file = None
    for cache in datafindcache_list:
        curr_ifo = cache.ifo
        for frame in cache:
            # Don't add a new workflow file entry for this frame if
            # if is a duplicate. These are assumed to be returned in time
            # order
            if prev_file:
                prev_name = prev_file.cache_entry.url.split('/')[-1]
                this_name = frame.url.split('/')[-1]
                if prev_name == this_name:
                    continue

            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost', 'file://')

            currFile = File(curr_ifo,
                            frame.description,
                            frame.segment,
                            file_url=frame.url,
                            use_tmp_subdirs=True)
            if frame.url.startswith('file://'):
                currFile.PFN(frame.url, site='local')
            else:
                currFile.PFN(frame.url, site='notlocal')
            datafind_filelist.append(currFile)
            prev_file = currFile
    return datafind_filelist
Ejemplo n.º 19
0
def select_files_by_ifo_combination(ifocomb, insps):
    """
    This function selects single-detector files ('insps') for a given ifo combination
    """
    inspcomb = FileList()
    for ifo, ifile in zip(*insps.categorize_by_attr('ifo')):
        if ifo in ifocomb:
            inspcomb += ifile

    return inspcomb
Ejemplo n.º 20
0
def setup_multiifo_interval_coinc(workflow,
                                  hdfbank,
                                  trig_files,
                                  stat_files,
                                  veto_file,
                                  veto_name,
                                  out_dir,
                                  pivot_ifo,
                                  fixed_ifo,
                                  tags=None):
    """
    This function sets up exact match multiifo coincidence
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    ifos, _ = trig_files.categorize_by_attr('ifo')
    findcoinc_exe = PyCBCFindMultiifoCoincExecutable(workflow.cp,
                                                     'multiifo_coinc',
                                                     ifos=ifos,
                                                     tags=tags,
                                                     out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(
        workflow.cp.get_opt_tags('workflow-coincidence',
                                 'parallelization-factor',
                                 [findcoinc_exe.ifo_string] + tags))

    statmap_files = []
    bg_files = FileList()
    for i in range(factor):
        group_str = '%s/%s' % (i, factor)
        coinc_node = findcoinc_exe.create_node(trig_files,
                                               hdfbank,
                                               stat_files,
                                               veto_file,
                                               veto_name,
                                               group_str,
                                               pivot_ifo,
                                               fixed_ifo,
                                               tags=['JOB' + str(i)])
        bg_files += coinc_node.output_files
        workflow.add_node(coinc_node)

    statmap_files = setup_multiifo_statmap(workflow,
                                           ifos,
                                           bg_files,
                                           out_dir,
                                           tags=tags)

    logging.info('...leaving coincidence ')
    return statmap_files
Ejemplo n.º 21
0
def setup_minifollowups(workflow, out_dir, frame_files,
                             coinc_file, tmpltbank_file, data_type, tags=None):
    ''' This performs a series of followup jobs on the num_events-th loudest
    events.
    '''

    logging.info('Entering minifollowups module')

    if tags == None: tags = []

    # create a FileList that will contain all output files
    output_filelist = FileList([])

    # check if minifollowups section exists
    # if not then do not do add minifollowup jobs to the workflow
    if not workflow.cp.has_section('workflow-minifollowups'):
      logging.info('There is no [workflow-minifollowups] section in configuration file')
      logging.info('Leaving minifollowups')
      return output_filelist

    # loop over number of loudest events to be followed up
    num_events = int(workflow.cp.get_opt_tags('workflow-minifollowups', 'num-events', ''))
    for num_event in range(num_events):

        # increment by 1 for human readability
        num_event += 1

        # get output directory for this event
        tag_str = '_'.join(tags)
        output_dir = out_dir['result/loudest_event_%d_of_%d_%s'%(num_event, num_events, tag_str)]

        # make a pycbc_mf_table node for this event
        table_exe = MinifollowupsTableExecutable(workflow.cp, 'mf_table',
                        workflow.ifo_string, output_dir, tags=tags)
        table_node = table_exe.create_node(workflow.analysis_time, coinc_file,
                        tmpltbank_file, data_type, num_event)
        workflow.add_node(table_node)
        output_filelist.extend(table_node.output_files)

    logging.info('Leaving minifollowups module')

    return output_filelist
Ejemplo n.º 22
0
def make_inference_1d_posterior_plots(
                    workflow, inference_file, output_dir, parameters=None,
                    analysis_seg=None, tags=None):
    parameters = [] if parameters is None else parameters
    files = FileList([])
    for (ii, parameter) in enumerate(parameters):
        files += make_inference_posterior_plot(
                    workflow, inference_file, output_dir,
                    parameters=[parameter], analysis_seg=analysis_seg,
                    tags=tags + ['param{}'.format(ii)])
    return files
Ejemplo n.º 23
0
def make_inference_1d_posterior_plots(
                    workflow, inference_file, output_dir, parameters=None,
                    analysis_seg=None, tags=None):
    parameters = [] if parameters is None else parameters
    files = FileList([])
    for parameter in parameters:
        files += make_inference_posterior_plot(
                    workflow, inference_file, output_dir,
                    parameters=[parameter], analysis_seg=analysis_seg,
                    tags=tags + [parameter])
    return files
Ejemplo n.º 24
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.
   
    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.
  
    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """ 
    datafind_filelist = FileList([])
    prev_file = None
    for cache in datafindcache_list:
        curr_ifo = cache.ifo
        for frame in cache:
            # Don't add a new workflow file entry for this frame if
            # if is a duplicate. These are assumed to be returned in time
            # order
            if prev_file:
                prev_name = prev_file.cache_entry.url.split('/')[-1]
                this_name = frame.url.split('/')[-1]
                if prev_name == this_name:
                    continue

            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost','file://')

            currFile = File(curr_ifo, frame.description,
                    frame.segment, file_url=frame.url, use_tmp_subdirs=True)
            if frame.url.startswith('file://'):
                currFile.PFN(frame.url, site='local')
            else:
                currFile.PFN(frame.url, site='notlocal')
            datafind_filelist.append(currFile)
            prev_file = currFile
    return datafind_filelist
Ejemplo n.º 25
0
def setup_trigger_fitting(workflow, insps, hdfbank, veto_file, veto_name):
    if not workflow.cp.has_option('workflow-coincidence', 'do-trigger-fitting'):
        return FileList()
    else:
        assert len(hdfbank) == 1  # must be a list with exactly 1 bank file
        assert len(veto_file) == 1
        assert len(veto_name) == 1
        smoothed_fit_files = FileList()
        for i in workflow.ifos:
            ifo_insp = [insp for insp in insps if (insp.ifo == i)]
            assert len(ifo_insp)==1
            raw_node = PyCBCFitByTemplateExecutable(workflow.cp,
                'fit_by_template', ifos=i).create_node(ifo_insp[0], hdfbank[0],
                                                    veto_file[0], veto_name[0])
            workflow += raw_node
            smooth_node = PyCBCFitOverParamExecutable(workflow.cp,
                'fit_over_param', ifos=i).create_node(raw_node.output_files[0],
                                                                    hdfbank[0])
            workflow += smooth_node
            smoothed_fit_files += smooth_node.output_files
        return smoothed_fit_files
Ejemplo n.º 26
0
def setup_gating_workflow(workflow,
                          science_segs,
                          datafind_outs,
                          output_dir=None,
                          tags=None):
    '''
    Setup gating section of CBC workflow. At present this only supports pregenerated
    gating files, in the future these could be created within the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    science_segs : Keyed dictionary of glue.segmentlist objects
        scienceSegs[ifo] holds the science segments to be analysed for each
        ifo. 
    datafind_outs : pycbc.workflow.core.FileList
        The file list containing the datafind files.
    output_dir : path string
        The directory where data products will be placed. 
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    gate_files : pycbc.workflow.core.FileList
        The FileList holding the gate files, 0 or 1 per ifo
    '''
    if tags is None:
        tags = []
    logging.info("Entering gating module.")
    make_analysis_dir(output_dir)
    cp = workflow.cp

    # Parse for options in ini file.
    try:
        gateMethod = cp.get_opt_tags("workflow-gating", "gating-method", tags)
    except:
        # Gating is optional, just return an empty list if not
        # provided.
        return FileList([])

    if gateMethod == "PREGENERATED_FILE":
        logging.info("Setting gating from pre-generated file(s).")
        gate_files = setup_gate_pregenerated(workflow, tags=tags)
    else:
        errMsg = "Gating method not recognized. Only "
        errMsg += "PREGENERATED_FILE is currently supported."
        raise ValueError(errMsg)

    logging.info("Leaving gating module.")
    return gate_files
Ejemplo n.º 27
0
    def create_node(self,
                    trig_files=None,
                    segment_dir=None,
                    out_tags=[],
                    tags=[]):
        node = Node(self)

        if not trig_files:
            raise ValueError("%s must be supplied with trigger files" %
                             self.name)

        # Data options
        pad_data = self.cp.get('inspiral', 'pad-data')
        if pad_data is None:
            raise ValueError("The option pad-data is a required option of "
                             "%s. Please check the ini file." % self.name)

        num_trials = int(self.cp.get("trig_combiner", "num-trials"))
        trig_name = self.cp.get('workflow', 'trigger-name')
        node.add_opt('--grb-name', trig_name)

        node.add_opt('--pad-data', pad_data)
        node.add_opt('--segment-length',
                     self.cp.get('inspiral', 'segment-duration'))
        node.add_opt('--ifo-tag', self.ifos)
        node.add_opt('--user-tag', 'INSPIRAL')

        # Set input / output options
        node.add_input_list_opt('--input-files', trig_files)

        node.add_opt('--segment-dir', segment_dir)
        node.add_opt('--output-dir', self.out_dir)

        out_files = FileList([])
        for out_tag in out_tags:
            out_file = File(self.ifos,
                            'INSPIRAL',
                            trig_files[0].segment,
                            directory=self.out_dir,
                            extension='xml.gz',
                            tags=["GRB%s" % trig_name, out_tag],
                            store_file=self.retain_files)
            #out_file.PFN(out_file.cache_entry.path, site="local")
            out_files.append(out_file)

        for trial in range(1, num_trials + 1):
            out_file = File(self.ifos,
                            'INSPIRAL',
                            trig_files[0].segment,
                            directory=self.out_dir,
                            extension='xml.gz',
                            tags=["GRB%s" % trig_name,
                                  "OFFTRIAL_%d" % trial],
                            store_file=self.retain_files)
            #out_file.PFN(out_file.cache_entry.path, site="local")
            out_files.append(out_file)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        return node, out_files
Ejemplo n.º 28
0
def setup_background_bins_inj(workflow, coinc_files, background_file,
                              bank_file, out_dir, tags=None):
    tags = [] if tags is None else tags

    bins_exe = PyCBCDistributeBackgroundBins(workflow.cp,
                                             'distribute_background_bins',
                                             ifos=workflow.ifos, tags=tags,
                                             out_dir=out_dir)

    statmap_exe = PyCBCStatMapInjExecutable(workflow.cp, 'statmap_inj',
                                            ifos=workflow.ifos, tags=tags,
                                            out_dir=out_dir)

    cstat_exe = PyCBCCombineStatmap(workflow.cp, 'combine_statmap',
                                    ifos=workflow.ifos, tags=tags,
                                    out_dir=out_dir)

    background_bins = workflow.cp.get_opt_tags('workflow-coincidence',
                                               'background-bins',
                                               tags).split(' ')
    background_bins = [x for x in background_bins if x != '']

    for inj_type in ['injinj', 'injfull', 'fullinj']:
        bins_node = bins_exe.create_node(FileList(coinc_files[inj_type]),
                                         bank_file, background_bins,
                                         tags=[inj_type])
        workflow += bins_node
        coinc_files[inj_type] = bins_node.output_files

    statmap_files = FileList([])
    for i in range(len(background_bins)):
        statnode = statmap_exe.create_node(
            FileList([coinc_files['injinj'][i]]),
            FileList([background_file[i]]),
            FileList([coinc_files['injfull'][i]]),
            FileList([coinc_files['fullinj'][i]]),
            tags=['BIN_%s' % i]
        )
        workflow += statnode
        statmap_files.append(statnode.output_files[0])

    cstat_node = cstat_exe.create_node(statmap_files)
    workflow += cstat_node

    return cstat_node.output_files[0]
Ejemplo n.º 29
0
def datafind_keep_unique_backups(backup_outs, orig_outs):
    """This function will take a list of backup datafind files, presumably
    obtained by querying a remote datafind server, e.g. CIT, and compares
    these against a list of original datafind files, presumably obtained by
    querying the local datafind server. Only the datafind files in the backup
    list that do not appear in the original list are returned. This allows us
    to use only files that are missing from the local cluster.

    Parameters
    -----------
    backup_outs : FileList
        List of datafind files from the remote datafind server.
    orig_outs : FileList
        List of datafind files from the local datafind server.

    Returns
    --------
    FileList
        List of datafind files in backup_outs and not in orig_outs.
    """
    # NOTE: This function is not optimized and could be made considerably
    #       quicker if speed becomes in issue. With 4s frame files this might
    #       be slow, but for >1000s files I don't foresee any issue, so I keep
    #       this simple.
    return_list = FileList([])
    # We compare the LFNs to determine uniqueness
    # Is there a way to associate two paths with one LFN??
    orig_names = [f.name for f in orig_outs]
    for file in backup_outs:
        if file.name not in orig_names:
            return_list.append(file)
        else:
            index_num = orig_names.index(file.name)
            orig_out = orig_outs[index_num]
            pfns = list(file.pfns)
            # This shouldn't happen, but catch if it does
            assert(len(pfns) == 1)
            orig_out.PFN(pfns[0].url, site='notlocal')

    return return_list
Ejemplo n.º 30
0
def datafind_keep_unique_backups(backup_outs, orig_outs):
    """This function will take a list of backup datafind files, presumably
    obtained by querying a remote datafind server, e.g. CIT, and compares
    these against a list of original datafind files, presumably obtained by
    querying the local datafind server. Only the datafind files in the backup
    list that do not appear in the original list are returned. This allows us
    to use only files that are missing from the local cluster.

    Parameters
    -----------
    backup_outs : FileList
        List of datafind files from the remote datafind server.
    orig_outs : FileList
        List of datafind files from the local datafind server.

    Returns
    --------
    FileList
        List of datafind files in backup_outs and not in orig_outs.
    """
    # NOTE: This function is not optimized and could be made considerably
    #       quicker if speed becomes in issue. With 4s frame files this might
    #       be slow, but for >1000s files I don't foresee any issue, so I keep
    #       this simple.
    return_list = FileList([])
    # We compare the LFNs to determine uniqueness
    # Is there a way to associate two paths with one LFN??
    orig_names = [f.name for f in orig_outs]
    for file in backup_outs:
        if file.name not in orig_names:
            return_list.append(file)
        else:
            index_num = orig_names.index(file.name)
            orig_out = orig_outs[index_num]
            pfns = list(file.pfns)
            # This shouldn't happen, but catch if it does
            assert len(pfns) == 1
            orig_out.PFN(pfns[0].url, site="notlocal")

    return return_list
Ejemplo n.º 31
0
def make_sensitivity_plot(workflow, inj_file, out_dir, exclude=None, require=None, tags=[]):
    makedir(out_dir)   
    secs = requirestr(workflow.cp.get_subsections('plot_sensitivity'), require)  
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        node = PlotExecutable(workflow.cp, 'plot_sensitivity', ifos=workflow.ifos,
                    out_dir=out_dir, tags=[tag] + tags).create_node()
        node.add_input_opt('--injection-file', inj_file)
        node.new_output_file_opt(inj_file.segment, '.png', '--output-file')
        workflow += node
        files += node.output_files
    return files
Ejemplo n.º 32
0
def setup_psd_pregenerated(workflow, tags=None):
    '''
    Setup CBC workflow to use pregenerated psd files.
    The file given in cp.get('workflow','pregenerated-psd-file-(ifo)') will
    be used as the --psd-file argument to geom_nonspinbank, geom_aligned_bank
    and pycbc_plot_psd_file.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    psd_files : pycbc.workflow.core.FileList
        The FileList holding the gating files
    '''
    if tags is None:
        tags = []
    psd_files = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_PSD"

    # Check for one psd for all ifos
    try:
        pre_gen_file = cp.get_opt_tags('workflow-psd',
                        'psd-pregenerated-file', tags)
        pre_gen_file = resolve_url(pre_gen_file)
        file_url = urljoin('file:', pathname2url(pre_gen_file))
        curr_file = File(workflow.ifos, user_tag, global_seg, file_url,
                                                    tags=tags)
        curr_file.PFN(file_url, site='local')
        psd_files.append(curr_file)
    except ConfigParser.Error:
        # Check for one psd per ifo
        for ifo in workflow.ifos:
            try:
                pre_gen_file = cp.get_opt_tags('workflow-psd',
                                'psd-pregenerated-file-%s' % ifo.lower(),
                                tags)
                pre_gen_file = resolve_url(pre_gen_file)
                file_url = urljoin('file:', pathname2url(pre_gen_file))
                curr_file = File(ifo, user_tag, global_seg, file_url,
                                                            tags=tags)
                curr_file.PFN(file_url, site='local')
                psd_files.append(curr_file)

            except ConfigParser.Error:
                # It's unlikely, but not impossible, that only some ifos
                # will have pregenerated PSDs
                logging.warn("No psd file specified for IFO %s." % (ifo,))
                pass

    return psd_files
Ejemplo n.º 33
0
    def create_node(self, parent, inj_trigs, inj_string, max_inc, segment):
        node = Node(self)

        trig_name = self.cp.get('workflow', 'trigger-name')
        node.add_opt('--inj-string', inj_string)
        node.add_opt('--max-inclination', max_inc)
        node.add_opt('--inj-cache', '%s' % parent.storage_path)

        out_files = FileList([])
        for inj_trig in inj_trigs:
            out_string = inj_string.split(max_inc)[0]
            out_file_tag = [out_string, "FILTERED", max_inc,
                            inj_trig.tag_str.rsplit('_', 1)[-1]]
            out_file = File(self.ifos, inj_trig.description,
                            inj_trig.segment, extension="xml",
                            directory=self.out_dir, tags=out_file_tag)
            out_file.PFN(out_file.cache_entry.path, site="local")
            out_files.append(out_file)

        node.add_opt('--output-dir', self.out_dir)

        return node, out_files
Ejemplo n.º 34
0
def setup_psd_calculate(workflow,
                        frame_files,
                        ifo,
                        segments,
                        segment_name,
                        out_dir,
                        gate_files=None,
                        tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    if workflow.cp.has_option_tags('workflow-psd',
                                   'parallelization-factor',
                                   tags=tags):
        num_parts = int(
            workflow.cp.get_opt_tags('workflow-psd',
                                     'parallelization-factor',
                                     tags=tags))
    else:
        num_parts = 1

    # get rid of duplicate segments which happen when splitting the bank
    segments = segmentlist(frozenset(segments))

    segment_lists = list(chunks(segments, num_parts))

    psd_files = FileList([])
    for i, segs in enumerate(segment_lists):
        seg_file = SegFile.from_segment_list(
            '%s_%s' % (segment_name, i),
            segmentlist(segs),
            segment_name,
            ifo,
            valid_segment=workflow.analysis_time,
            extension='xml',
            directory=out_dir)

        psd_files += [
            make_psd_file(workflow,
                          frame_files,
                          seg_file,
                          segment_name,
                          out_dir,
                          gate_files=gate_files,
                          tags=tags + ['PART%s' % i])
        ]

    if num_parts > 1:
        return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags)
    else:
        return psd_files[0]
Ejemplo n.º 35
0
def merge_single_detector_hdf_files(workflow, bank_file, trigger_files, out_dir, tags=None):
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    out = FileList()
    for ifo in workflow.ifos:
        node = MergeExecutable(workflow.cp, 'hdf_trigger_merge',
                        ifos=ifo, out_dir=out_dir, tags=tags).create_node()
        node.add_input_opt('--bank-file', bank_file)
        node.add_input_list_opt('--trigger-files', trigger_files.find_output_with_ifo(ifo))
        node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file')
        workflow += node
        out += node.output_files
    return out
Ejemplo n.º 36
0
def make_range_plot(workflow, psd_files, out_dir, exclude=None, require=None, tags=None):
    tags = [] if tags is None else tags
    makedir(out_dir)
    secs = requirestr(workflow.cp.get_subsections('plot_range'), require)  
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        node = PlotExecutable(workflow.cp, 'plot_range', ifos=workflow.ifos,
                              out_dir=out_dir, tags=[tag] + tags).create_node()
        node.add_input_list_opt('--psd-files', psd_files)
        node.new_output_file_opt(workflow.analysis_time, '.png', '--output-file')
        workflow += node
        files += node.output_files
    return files
Ejemplo n.º 37
0
    def create_node(self,
                    trig_files=None,
                    segment_dir=None,
                    analysis_seg=None,
                    out_tags=[],
                    tags=[]):
        node = Node(self)

        if not trig_files:
            raise ValueError("%s must be supplied with trigger files" %
                             self.name)

        # Data options
        num_trials = int(self.cp.get("trig_combiner", "num-trials"))
        trig_name = self.cp.get('workflow', 'trigger-name')
        if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
                self.cp.has_option_tag('inspiral', 'do-short-slides',
                                       'coherent_no_injections'):
            node.add_opt('--short-slides')

        node.add_opt('--grb-name', trig_name)

        node.add_opt('--trig-start-time', analysis_seg[0])
        node.add_opt('--ifo-tag', self.ifos)
        node.add_opt('--user-tag', 'INSPIRAL')

        # Set input / output options
        node.add_input_list_opt('--input-files', trig_files)

        node.add_opt('--segment-dir', segment_dir)
        node.add_opt('--output-dir', self.out_dir)

        out_files = FileList([])
        for out_tag in out_tags:
            out_file = File(self.ifos,
                            'INSPIRAL',
                            trig_files[0].segment,
                            directory=self.out_dir,
                            extension='xml.gz',
                            tags=["GRB%s" % trig_name, out_tag],
                            store_file=self.retain_files)
            out_files.append(out_file)

        for trial in range(1, num_trials + 1):
            out_file = File(self.ifos,
                            'INSPIRAL',
                            trig_files[0].segment,
                            directory=self.out_dir,
                            extension='xml.gz',
                            tags=["GRB%s" % trig_name,
                                  "OFFTRIAL_%d" % trial],
                            store_file=self.retain_files)
            out_files.append(out_file)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        return node, out_files
Ejemplo n.º 38
0
def setup_trigger_fitting(workflow, insps, hdfbank, veto_file, veto_name,
                          tags=None):
    if not workflow.cp.has_option('workflow-coincidence', 'do-trigger-fitting'):
        return FileList()
    else:
        smoothed_fit_files = FileList()
        for i in workflow.ifos:
            ifo_insp = [insp for insp in insps if (insp.ifo == i)]
            assert len(ifo_insp)==1
            ifo_insp = ifo_insp[0]
            raw_exe = PyCBCFitByTemplateExecutable(workflow.cp,
                                                   'fit_by_template', ifos=i,
                                                   tags=tags)
            raw_node = raw_exe.create_node(ifo_insp, hdfbank,
                                           veto_file, veto_name)
            workflow += raw_node
            smooth_exe = PyCBCFitOverParamExecutable(workflow.cp,
                                                     'fit_over_param', ifos=i,
                                                     tags=tags)
            smooth_node = smooth_exe.create_node(raw_node.output_file,
                                                 hdfbank)
            workflow += smooth_node
            smoothed_fit_files += smooth_node.output_files
        return smoothed_fit_files
Ejemplo n.º 39
0
def make_coinc_info(workflow, singles, bank, coinc, num, out_dir, tags=None):
    tags = [] if tags is None else tags
    makedir(out_dir)
    name = 'page_coincinfo'
    files = FileList([])
    node = PlotExecutable(workflow.cp, name, ifos=workflow.ifos,
                              out_dir=out_dir, tags=tags).create_node()
    node.add_input_list_opt('--single-trigger-files', singles)
    node.add_input_opt('--statmap-file', coinc)
    node.add_input_opt('--bank-file', bank)
    node.add_opt('--n-loudest', str(num))
    node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file')
    workflow += node
    files += node.output_files
    return files
Ejemplo n.º 40
0
def make_inj_info(workflow, injection_file, injection_index, num, out_dir,
                  tags=None):
    tags = [] if tags is None else tags
    makedir(out_dir)
    name = 'page_injinfo'
    files = FileList([])
    node = PlotExecutable(workflow.cp, name, ifos=workflow.ifos,
                              out_dir=out_dir, tags=tags).create_node()
    node.add_input_opt('--injection-file', injection_file)
    node.add_opt('--injection-index', str(injection_index))
    node.add_opt('--n-nearest', str(num))
    node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file')
    workflow += node
    files += node.output_files
    return files
Ejemplo n.º 41
0
def make_foundmissed_plot(workflow, inj_file, out_dir, exclude=None, require=None, tags=[]):
    makedir(out_dir)   
    secs = requirestr(workflow.cp.get_subsections('plot_foundmissed'), require)  
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        exe = PlotExecutable(workflow.cp, 'plot_foundmissed', ifos=workflow.ifos,
                    out_dir=out_dir, tags=[tag] + tags)
        node = exe.create_node()        
        ext = '.html' if exe.has_opt('dynamic') else '.png'
        node.add_input_opt('--injection-file', inj_file)
        node.new_output_file_opt(inj_file.segment, ext, '--output-file')
        workflow += node   
        files += node.output_files
    return files
Ejemplo n.º 42
0
def setup_interval_coinc(workflow, hdfbank, trig_files, stat_files,
                         veto_files, veto_names, out_dir, tags=None):
    """
    This function sets up exact match coincidence and background estimation

    using a folded interval technique.
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    if len(hdfbank) != 1:
        raise ValueError('Must use exactly 1 bank file for this coincidence '
                         'method, I got %i !' % len(hdfbank))
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError('This coincidence method only supports two-ifo '
                         'searches')

    findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc',
                                             ifos=workflow.ifos,
                                             tags=tags, out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(workflow.cp.get_opt_tags('workflow-coincidence',
                                          'parallelization-factor', tags))

    statmap_files = []
    for veto_file, veto_name in zip(veto_files, veto_names):
        bg_files = FileList()
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank,
                                                   stat_files,
                                                   veto_file, veto_name,
                                                   group_str,
                                                   tags=[veto_name, str(i)])
            bg_files += coinc_node.output_files
            workflow.add_node(coinc_node)

        statmap_files += [setup_statmap(workflow, bg_files, hdfbank, out_dir,
                                        tags=tags + [veto_name])]

    logging.info('...leaving coincidence ')
    return statmap_files
Ejemplo n.º 43
0
def make_single_template_plots(workflow,
                               segs,
                               seg_name,
                               coinc,
                               bank,
                               num,
                               out_dir,
                               exclude=None,
                               require=None,
                               tags=None):
    tags = [] if tags is None else tags
    makedir(out_dir)
    name = 'single_template_plot'
    secs = requirestr(workflow.cp.get_subsections(name), require)
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        for ifo in workflow.ifos:
            # Reanalyze the time around the trigger in each detector
            node = PlotExecutable(workflow.cp,
                                  'single_template',
                                  ifos=[ifo],
                                  out_dir=out_dir,
                                  tags=[tag] + tags).create_node()
            node.add_input_opt('--statmap-file', coinc)
            node.add_opt('--n-loudest', str(num))
            node.add_input_opt('--inspiral-segments', segs[ifo])
            node.add_opt('--segment-name', seg_name)
            node.add_input_opt('--bank-file', bank)
            node.new_output_file_opt(workflow.analysis_time, '.hdf',
                                     '--output-file')
            data = node.output_files[0]
            workflow += node

            # Make the plot for this trigger and detector
            node = PlotExecutable(workflow.cp,
                                  name,
                                  ifos=[ifo],
                                  out_dir=out_dir,
                                  tags=[tag] + tags).create_node()
            node.add_input_opt('--single-template-file', data)
            node.new_output_file_opt(workflow.analysis_time, '.png',
                                     '--output-file')
            workflow += node
            files += node.output_files
    return files
Ejemplo n.º 44
0
    def create_node(self, parent, tags=[]):
        node = Node(self)

        # Set input / output options
        node.add_opt('--trig-file', '%s' % parent.storage_path)
        node.add_opt('--output-dir', self.out_dir)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        # Adding output files as pycbc.workflow.core.File objects
        out_file = File(self.ifos, 'INSPIRAL', parent.segment,
                        directory=self.out_dir, extension='xml.gz',
                        tags=[parent.tag_str, 'CLUSTERED'],
                        store_file=self.retain_files)
        #out_file.PFN(out_file.cache_entry.path, site="local")

        return node, FileList([out_file])
Ejemplo n.º 45
0
def setup_postprocprep_pipedown_workflow(workflow, coincFiles, output_dir,
                                      tags=[], do_repop=False, 
                                      injectionFiles=None,
                                      vetoFiles=None, injLessTag=None,
                                      injectionTags=[], veto_cats=[]):
    """
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    coincFiles : pycbc.workflow.core.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    do_repop : Boolean
        If False, use the 'coinc_inspiral.snr' column from the coincident 
        trigger files as clustering and ranking statistic; if True, use
        a repop_coinc job before clustering to calculate a different ranking
        statistic and store in the coinc_inspiral table for later use.
    injectionFiles : pycbc.workflow.core.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    vetoFiles : pycbc.workflow.core.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    injLessTag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injectionTags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cats : list of integers (optional, default = [])
        Decide which set of veto files should be used in the post-processing
        preparation. For example tell the workflow to only generate results
        at cumulative categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    finalFiles : pycbc.workflow.core.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : pycbc.workflow.core.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : pycbc.workflow.core.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : pycbc.workflow.core.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")

    # Setup needed exe classes
    sqliteCombine1ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner1-exe", tags)
    sqliteCombine1Exe = select_generic_executable(workflow, 
                                                  sqliteCombine1ExeTag)
    sqliteCombine2ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner2-exe", tags)
    sqliteCombine2Exe = select_generic_executable(workflow, 
                                                  sqliteCombine2ExeTag)
    clusterCoincsExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-cluster-exe", tags)
    clusterCoincsExe = select_generic_executable(workflow, clusterCoincsExeTag)
    injFindExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-injfind-exe", tags)
    injFindExe = select_generic_executable(workflow, injFindExeTag)

    sqliteCombine1Outs = FileList([])
    clusterCoincsOuts = FileList([])
    injFindOuts = FileList([])
    sqliteCombine2Outs = FileList([])

    if do_repop:
        repopCoincExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                                "postprocprep-repop-exe", tags)
        repopCoincExe = select_generic_executable(workflow, repopCoincExeTag)
        repopCoincOuts = FileList([])

    for cat in veto_cats:
        # FIXME: Some hacking is still needed while we support pipedown
        # FIXME: There are currently 3 names to say cumulative cat_3
        vetoTag = 'CUMULATIVE_CAT_%d' %(cat)
        dqSegFile = vetoFiles.find_output_with_tag(vetoTag)
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            raise ValueError(errMsg)
        # Don't think this is used here, this is the tag *in* the file
        dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(cat)
        # FIXME: Here we set the dqVetoName to be compatible with pipedown
        pipedownDQVetoName = 'CAT_%d_VETO' %(cat)

        sqliteCombine2Inputs = FileList([])
        # Do injection-less jobs first.

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Combine trig files first
        currTags = tags + [injLessTag, vetoTag]
        trigVetoInpFiles = coincFiles.find_output_with_tag(pipedownDQVetoName)
        trigInpFiles = trigVetoInpFiles.find_output_with_tag(injLessTag)
        if len(trigInpFiles) == 0:
            err_msg = "No input files found. Workflow would fail."
            raise ValueError(err_msg)
        trigInpFiles.append(dqSegFile[0])
        sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                              sqliteCombine1ExeTag,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles, 
                                          workflow=workflow)
        sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine1Node)
        # Node has only one output file
        sqliteCombine1Out = sqliteCombine1Node.output_files[0]
        sqliteCombine1Outs.append(sqliteCombine1Out)

        if do_repop:
            repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
            repopCoincNode = repopCoincJob.create_node(workflow.analysis_time,
                                                       sqliteCombine1Out)
            repopCoincNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(repopCoincNode)
            # Node has only one output file
            repopCoincOut = repopCoincNode.output_files[0]
            repopCoincOuts.append(repopCoincOut)

        # Input file plumbing allowing for possible repop_coinc job
        clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
        # Cluster coincidences
        clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                            clusterCoincsExeTag,
                                            ifo=workflow.ifo_string, 
                                            out_dir=output_dir, 
                                            tags=currTags)
        clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
        clusterCoincsNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(clusterCoincsNode)
        # Node has only one output file
        clusterCoincsOut = clusterCoincsNode.output_files[0]
        clusterCoincsOuts.append(clusterCoincsOut)
        sqliteCombine2Inputs.append(clusterCoincsOut)

        # Do injection jobs
        for injTag in injectionTags:
            # Choose a label for clustering the jobs
            job_label = get_random_label()
            # Combine trig files first
            currTags = tags + [injTag, vetoTag]
            trigInpFiles = trigVetoInpFiles.find_output_with_tag(injTag)
            trigInpFiles.append(dqSegFile[0])
            injFile = injectionFiles.find_output_with_tag(injTag)
            assert (len(injFile) == 1)
            sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                                  sqliteCombine1ExeTag,
                                                  ifo=workflow.ifo_string,
                                                  out_dir=output_dir,
                                                  tags=currTags)
            sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles,
                                          injFile=injFile[0], injString=injTag,
                                          workflow=workflow)
            sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
            workflow.add_node(sqliteCombine1Node)
            # Node has only one output file
            sqliteCombine1Out = sqliteCombine1Node.output_files[0]
            sqliteCombine1Outs.append(sqliteCombine1Out)

            if do_repop:
                repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
                repopCoincNode = repopCoincJob.create_node(
                                     workflow.analysis_time, sqliteCombine1Out)
                repopCoincNode.add_profile('pegasus', 'label', job_label)
                workflow.add_node(repopCoincNode)
                # Node has only one output file
                repopCoincOut = repopCoincNode.output_files[0]
                repopCoincOuts.append(repopCoincOut)

            # Input file plumbing allowing for possible repop_coinc job
            clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
            # Cluster coincidences
            clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                                clusterCoincsExeTag,
                                                ifo=workflow.ifo_string,
                                                out_dir=output_dir,
                                                tags=currTags)
            clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
            clusterCoincsNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(clusterCoincsNode)
            # Node has only one output file
            clusterCoincsOut = clusterCoincsNode.output_files[0]
            clusterCoincsOuts.append(clusterCoincsOut)
            sqliteCombine2Inputs.append(clusterCoincsOut)

        # Choose a new label for pegasus-clustering the jobs
        job_label = get_random_label()

        # Combine everything together and add veto file
        currTags = tags + [vetoTag]
        sqliteCombine2Job = sqliteCombine2Exe(workflow.cp, 
                                              sqliteCombine2ExeTag,
                                              ifo=workflow.ifo_string, 
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine2Node = sqliteCombine2Job.create_node(
                                  workflow.analysis_time, sqliteCombine2Inputs)
        sqliteCombine2Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine2Node)
        sqliteCombine2Out = sqliteCombine2Node.output_files[0]
        sqliteCombine2Outs.append(sqliteCombine2Out)

        # Inj finding
        injFindJob = injFindExe(workflow.cp, injFindExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,tags=currTags)
        injFindNode = injFindJob.create_node(workflow.analysis_time,
                                                         sqliteCombine2Out)
        injFindNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(injFindNode)
        injFindOut = injFindNode.output_files[0]
        injFindOuts.append(injFindOut)


    return injFindOuts, sqliteCombine1Outs, clusterCoincsOuts,\
           sqliteCombine2Outs
Ejemplo n.º 46
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.

    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.

    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """
    prev_file = None
    prev_name = None
    this_name = None

    datafind_filelist = FileList([])

    for cache in datafindcache_list:
        # sort the cache into time sequential order
        cache.sort()
        curr_ifo = cache.ifo
        for frame in cache:
            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost','file://')

            # Create one File() object for each unique frame file that we
            # get back in the cache.
            if prev_file:
                prev_name = os.path.basename(prev_file.cache_entry.url)
                this_name = os.path.basename(frame.url)

            if (prev_file is None) or (prev_name != this_name):
                currFile = File(curr_ifo, frame.description,
                    frame.segment, file_url=frame.url, use_tmp_subdirs=True)
                datafind_filelist.append(currFile)
                prev_file = currFile

            # Populate the PFNs for the File() we just created
            if frame.url.startswith('file://'):
                currFile.PFN(frame.url, site='local')
                if frame.url.startswith(
                    'file:///cvmfs/oasis.opensciencegrid.org/ligo/frames'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    currFile.PFN(frame.url, site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'root://xrootd-local.unl.edu/user/'), site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://red-gridftp.unl.edu/user/'), site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://ldas-grid.ligo.caltech.edu/hdfs/'), site='osg')
                elif frame.url.startswith(
                    'file:///cvmfs/gwosc.osgstorage.org/'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    for s in ['osg', 'orangegrid', 'osgconnect']:
                        currFile.PFN(frame.url, site=s)
                        currFile.PFN(frame.url, site="{}-scratch".format(s))
            else:
                currFile.PFN(frame.url, site='notlocal')

    return datafind_filelist
Ejemplo n.º 47
0
def setup_postproc_coh_PTF_offline_workflow(workflow, trig_files, trig_cache,
        ts_trig_files, inj_trig_files, inj_files, inj_trig_caches, inj_caches,
        config_file, output_dir, html_dir, segment_dir, segs_plot, ifos,
        inj_tags=None, tags=None):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the on/off source analysis jobs.
    trig_cache : pycbc.workflow.core.File
        A cache file pointing to the trigger files.
    ts_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the timeslide analysis jobs.
    inj_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files produced by injection jobs.
    inj_files : pycbc.workflow.core.FileList
        A FileList of the injection set files.
    inj_trig_caches : pycbc.workflow.core.FileList
        A FileList containing the cache files that point to the injection
        trigger files.
    inj_caches : pycbc.workflow.core.FileList
        A FileList containing cache files that point to the injection files.
    config_file : pycbc.workflow.core.File
        The parsed configuration file.
    output_dir : path
        The directory in which output files will be stored.
    html_dir : path
        The directory where the result webpage will be placed.
    segment_dir : path
        The directory in which data segment information is stored.
    segs_plot : pycbc.workflow.core.File
        The plot showing the analysis segments for each IFO around the GRB time.
        This is produced at the time of workflow generation.
    ifos : list
        A list containing the analysis interferometers.
    inj_tags : list
        List containing the strings used to uniquely identify the injection
        sets included in the analysis.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    pp_outs : pycbc.workflow.core.FileList
        A list of the output from this stage.
    """
    if inj_tags is None:
        inj_tags = []
    if tags is None:
        tags = []
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))
    do_injections = cp.has_section("workflow-injections")

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_class = select_generic_executable(workflow, "efficiency")

    #horizon_dist_class = select_generic_executable(workflow, "horizon_dist")

    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up injection jobs if desired
    if do_injections:
        workflow, injfinder_nodes, injfinder_outs, fm_cache, \
                injcombiner_nodes, injcombiner_outs, injcombiner_out_tags, \
                inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs = \
                setup_coh_PTF_injections_pp(workflow, inj_trig_files,
                        inj_files, inj_trig_caches, inj_caches, pp_nodes,
                        pp_outs, inj_tags, output_dir, segment_dir, ifos,
                        tags=tags)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Set up main trig_combiner class and tags
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    slides = all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
            cp.has_option_tag("inspiral", "do-short-slides",
                              "coherent_no_injections")
    if slides:
        trig_combiner_out_tags.extend(["ZEROLAG_OFF", "ZEROLAG_ALL"])
    
    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)

    # Do first stage of trig_combiner and trig_cluster jobs if desired
    if workflow.cp.has_option("workflow-postproc", "do-two-stage-clustering"):
        logging.info("Doing two-stage clustering.")
        trig_combiner_s1_jobs = trig_combiner_class(cp, "trig_combiner",
                ifo=ifos, out_dir=output_dir, tags=tags+["INTERMEDIATE"])

        num_stage_one_jobs = int(workflow.cp.get("workflow-postproc",
            "num-stage-one-cluster-jobs"))
        num_inputs_per_job = -(-len(trig_files) // num_stage_one_jobs)
        split_trig_files = (trig_files[p:p + num_inputs_per_job] for p in \
                            xrange(0, len(trig_files), num_inputs_per_job))
        trig_cluster_s1_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                out_dir=output_dir, tags=tags+["INTERMEDIATE"])
        trig_cluster_s1_nodes = []
        trig_cluster_s1_outs = FileList([])
        for j, s1_inputs in zip(range(num_stage_one_jobs), split_trig_files):
            trig_combiner_s1_node, trig_combiner_s1_outs = \
                    trig_combiner_s1_jobs.create_node(s1_inputs,
                            segment_dir, workflow.analysis_time,
                            out_tags=trig_combiner_out_tags, tags=tags+[str(j)])
            pp_nodes.append(trig_combiner_s1_node)
            workflow.add_node(trig_combiner_s1_node)

            unclust_file = [f for f in trig_combiner_s1_outs \
                            if "ALL_TIMES" in f.tag_str][0]
            trig_cluster_s1_node, curr_outs = trig_cluster_s1_jobs.create_node(\
                    unclust_file)
            trig_cluster_s1_outs.extend(curr_outs)
            clust_file = curr_outs[0]
            trig_cluster_s1_nodes.append(trig_cluster_s1_node)
            pp_nodes.append(trig_cluster_s1_node)
            workflow.add_node(trig_cluster_s1_node)
            dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                 child=trig_cluster_s1_node._dax_node)
            workflow._adag.addDependency(dep)

        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                        segment_dir, workflow.analysis_time,
                        out_tags=trig_combiner_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)
        for trig_cluster_s1_node in trig_cluster_s1_nodes:
            dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                 child=trig_combiner_node._dax_node)
            workflow._adag.addDependency(dep)

    else:
        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_files, segment_dir,
                        workflow.analysis_time, out_tags=trig_combiner_out_tags,
                        tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Set up trig_cluster jobs
    trig_cluster_nodes = []
    for out_tag in trig_combiner_out_tags:
        unclust_file = [f for f in trig_combiner_outs \
                        if out_tag in f.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        trig_cluster_nodes.append(trig_cluster_node)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)
        # Are we not doing time slides?
        if ts_trig_files is None:
            if out_tag == "OFFSOURCE":
                off_node = trig_cluster_node
                offsource_clustered = clust_file

            # Add sbv_plotter and efficiency jobs
            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(workflow, 
                    unclust_file, clust_file, sbv_plotter_jobs,
                    efficiency_jobs, inj_efficiency_jobs, off_node,
                    trig_cluster_node, offsource_clustered, injfinder_nodes,
                    injcombiner_nodes, injcombiner_outs,
                    inj_sbv_plotter_parent_nodes, inj_tags,
                    injcombiner_out_tags, pp_nodes, output_dir, segment_dir,
                    ifos, out_tag, do_injs=do_injections, tags=tags)

    # If doing time slides
    if ts_trig_files is not None:
        trig_combiner_ts_nodes = []
        trig_cluster_ts_nodes = []
        trig_cluster_all_times_nodes = []
        ts_all_times_outs = FileList([out for out in trig_cluster_outs
                                      if "ALL_TIMES" in out.tag_str])
        trig_combiner_ts_out_tags = ["ALL_TIMES", "OFFSOURCE"]
        ts_tags = list(set([[ts_tag for ts_tag in ts_trig_file.tags
                             if "SLIDE" in ts_tag][0]
                            for ts_trig_file in ts_trig_files]))
        for ts_tag in ts_tags:
            # Do one slide at a time
            ts_trigs = FileList([ts_trig_file for ts_trig_file in ts_trig_files
                                 if ts_tag in ts_trig_file.tags])

            # And do two-stage clustering if desired
            if workflow.cp.has_option("workflow-postproc",
                                      "do-two-stage-clustering"):

                split_trig_files = (ts_trigs[p:p + num_inputs_per_job]
                        for p in xrange(0, len(ts_trigs), num_inputs_per_job))
                trig_cluster_s1_nodes = []
                trig_cluster_s1_outs = FileList([])
                for j, s1_inputs in zip(range(num_stage_one_jobs),
                                        split_trig_files):
                    trig_combiner_s1_node, trig_combiner_s1_outs = \
                            trig_combiner_s1_jobs.create_node(s1_inputs,
                                     segment_dir, workflow.analysis_time,
                                     out_tags=trig_combiner_ts_out_tags,
                                     slide_tag=ts_tag, tags=tags+[str(j)])
                    pp_nodes.append(trig_combiner_s1_node)
                    workflow.add_node(trig_combiner_s1_node)

                    unclust_file = [f for f in trig_combiner_s1_outs \
                                    if "ALL_TIMES" in f.tag_str][0]
                    trig_cluster_s1_node, curr_outs = \
                            trig_cluster_s1_jobs.create_node(unclust_file)
                    trig_cluster_s1_outs.extend(curr_outs)
                    clust_file = curr_outs[0]
                    trig_cluster_s1_nodes.append(trig_cluster_s1_node)
                    pp_nodes.append(trig_cluster_s1_node)
                    workflow.add_node(trig_cluster_s1_node)
                    dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                         child=trig_cluster_s1_node._dax_node)
                    workflow._adag.addDependency(dep)

                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                                segment_dir, workflow.analysis_time,
                                slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)
                for trig_cluster_s1_node in trig_cluster_s1_nodes:
                    dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                         child=trig_combiner_ts_node._dax_node)
                    workflow._adag.addDependency(dep)
            else:
                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(ts_trigs, segment_dir,
                                workflow.analysis_time, slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)

            # Set up trig cluster jobs for each timeslide
            for ts_out_tag in trig_combiner_ts_out_tags:
                unclust_file = [f for f in trig_combiner_ts_outs \
                                if ts_out_tag in f.tag_str][0]
                trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                        unclust_file)
                trig_cluster_outs.extend(curr_outs)
                clust_file = curr_outs[0]
                trig_cluster_ts_nodes.append(trig_cluster_node)
                pp_nodes.append(trig_cluster_node)
                workflow.add_node(trig_cluster_node)
                dep = dax.Dependency(parent=trig_combiner_ts_node._dax_node,
                                     child=trig_cluster_node._dax_node)
                workflow._adag.addDependency(dep)        
                if ts_out_tag == "ALL_TIMES":
                    trig_cluster_all_times_nodes.append(trig_cluster_node)
                    ts_all_times_outs.extend(FileList([clust_file]))

        # Combine all timeslides
        trig_combiner_all_node, trig_combiner_all_outs = \
                trig_combiner_jobs.create_node(ts_all_times_outs, segment_dir,
                            workflow.analysis_time, slide_tag="ALL_SLIDES",
                            out_tags=trig_combiner_ts_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_all_node)
        workflow.add_node(trig_combiner_all_node)
        for trig_cluster_ts_node in trig_cluster_all_times_nodes:
            dep = dax.Dependency(parent=trig_cluster_ts_node._dax_node,
                                 child=trig_combiner_all_node._dax_node)
            workflow._adag.addDependency(dep)        

        for out_tag in trig_combiner_ts_out_tags:
            trig_cluster_outs = FileList([f for f in trig_cluster_outs
                                          if out_tag not in f.tag_str])
        trig_cluster_outs.extend(trig_combiner_all_outs)
        off_node = trig_combiner_all_node
        offsource_clustered = [f for f in trig_cluster_outs
                               if "OFFSOURCE" in f.tag_str
                               and "ZERO_LAG" not in f.tag_str][0]

        # Add sbv_plotter and efficiency jobs
        for out_tag in trig_combiner_out_tags:
            clust_file = [f for f in trig_cluster_outs \
                          if out_tag in f.tag_str][0]

            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(workflow, 
                    unclust_file, clust_file, sbv_plotter_jobs,
                    efficiency_jobs, inj_efficiency_jobs, off_node, off_node,
                    offsource_clustered, injfinder_nodes, injcombiner_nodes,
                    injcombiner_outs, inj_sbv_plotter_parent_nodes, inj_tags,
                    injcombiner_out_tags, pp_nodes, output_dir, segment_dir,
                    ifos, out_tag, do_injs=do_injections, tags=tags)

    trial = 1
    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [f for f in trig_combiner_outs \
                        if trial_tag in f.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if do_injections:
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if do_injections:
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                seg_plot=segs_plot, html_dir=html_dir, time_slides=slides)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                seg_plot=segs_plot, html_dir=html_dir, time_slides=slides)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    try:
        open_box_cmd = html_summary_node.executable.get_pfn() + " "
    except:
        exe_path = html_summary_node.executable.get_pfn('nonlocal').replace(\
                "https", "http")
        exe_name = exe_path.rsplit('/', 1)[-1]
        open_box_cmd = "wget %s\n" % exe_path
        open_box_cmd += "chmod 500 ./%s\n./%s " % (exe_name, exe_name)
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Ejemplo n.º 48
0
def setup_multiifo_interval_coinc_inj(workflow, hdfbank, full_data_trig_files, inj_trig_files,
                                      stat_files, veto_file, veto_name,
                                      out_dir, pivot_ifo, fixed_ifo, tags=None):
    """
    This function sets up exact match multiifo coincidence for injections
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence for injections')

    if len(hdfbank) != 1:
        raise ValueError('Must use exactly 1 bank file for this coincidence '
                         'method, I got %i !' % len(hdfbank))
    hdfbank = hdfbank[0]

    # Wall time knob and memory knob
    factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags))

    ffiles = {}
    ifiles = {}
    for ifo, ffi in zip(*full_data_trig_files.categorize_by_attr('ifo')):
        ffiles[ifo] = ffi[0]
    for ifo, ifi in zip(*inj_trig_files.categorize_by_attr('ifo')):
        ifiles[ifo] = ifi[0]

    injinj_files = FileList()
    injfull_files = FileList()
    fullinj_files = FileList()
    # For the injfull and fullinj separation we take the pivot_ifo on one side,
    # and the rest that are attached to the fixed_ifo on the other side
    for ifo in ifiles:  # ifiles is keyed on ifo
        if ifo == pivot_ifo:
            injinj_files.append(ifiles[ifo])
            injfull_files.append(ifiles[ifo])
            fullinj_files.append(ffiles[ifo])
        else:
            injinj_files.append(ifiles[ifo])
            injfull_files.append(ffiles[ifo])
            fullinj_files.append(ifiles[ifo])

    combo = [(injinj_files, "injinj"),
             (injfull_files, "injfull"),
             (fullinj_files, "fullinj"),
            ]
    bg_files = {'injinj':[], 'injfull':[], 'fullinj':[]}

    for trig_files, ctag in combo:
        findcoinc_exe = PyCBCFindMultiifoCoincExecutable(workflow.cp,
                                                         'multiifo_coinc',
                                                         ifos=ifiles.keys(),
                                                         tags=tags + [ctag],
                                                         out_dir=out_dir)
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank,
                                                   stat_files,
                                                   veto_file, veto_name,
                                                   group_str,
                                                   pivot_ifo,
                                                   fixed_ifo,
                                                   tags=[veto_name, str(i)])

            bg_files[ctag] += coinc_node.output_files
            workflow.add_node(coinc_node)

    logging.info('...leaving coincidence for injections')
    return bg_files
Ejemplo n.º 49
0
    def create_node(self, trig_files=None, segment_dir=None, analysis_seg=None,
                    slide_tag=None, out_tags=None, tags=None):
        import Pegasus.DAX3 as dax
        if out_tags is None:
            out_tags = []
        if tags is None:
            tags = []
        node = Node(self)

        if not trig_files:
            raise ValueError("%s must be supplied with trigger files"
                              % self.name)

        # Data options
        num_trials = int(self.cp.get("trig_combiner", "num-trials"))
        trig_name = self.cp.get('workflow', 'trigger-name')
        if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
                self.cp.has_option_tag('inspiral', 'do-short-slides',
                                       'coherent_no_injections'):
            node.add_opt('--short-slides')
        
        node.add_opt('--grb-name', trig_name)
        
        node.add_opt('--trig-start-time', analysis_seg[0])
        node.add_opt('--ifo-tag', self.ifos)
        node.add_opt('--user-tag', 'INSPIRAL')
        if tags:
            node.add_opt('--job-tag', '_'.join(tags))

        if slide_tag is not None:
            node.add_opt('--slide-tag', slide_tag)
            node.add_opt('--long-slides')
            tag_start=["TIMESLIDES_GRB%s_%s" % (trig_name, slide_tag)]+tags
        else:
            tag_start=["GRB%s" % trig_name]+tags

        # Set input / output options
        if all(hasattr(t.node, "executable") for t in trig_files):
            if all(t.node.executable.name == "trig_cluster"
                   for t in trig_files):
                node.add_opt('--input-files',
                             " ".join([t.storage_path for t in trig_files]))
                if self.cp.has_option_tag('inspiral', 'do-short-slides',
                                          'coherent_no_injections'):
                    node.add_opt('--short-slides')
            else:
                node.add_input_list_opt('--input-files', trig_files)
        else:
            node.add_opt('--input-files',
                         " ".join([t.storage_path for t in trig_files]))

        node.add_opt('--segment-dir', segment_dir)
        node.add_opt('--output-dir', self.out_dir)

        out_files = FileList([])
        for out_tag in out_tags:
            out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
                            directory=self.out_dir, extension='xml.gz',
                            tags=tag_start+[out_tag],
                            store_file=self.retain_files)
            out_files.append(out_file)
            #node._dax_node.uses(out_file, link=dax.Link.OUTPUT, register=False,
            #                    transfer=False)
            #node._outputs += [out_file]
            #out_file.node = node
            #node._add_output(out_file)

        for trial in range(1, num_trials + 1):
            out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
                            directory=self.out_dir, extension='xml.gz',
                            tags=tag_start+["OFFTRIAL_%d" % trial],
                            store_file=self.retain_files)
            out_files.append(out_file)
            #node._dax_node.uses(out_file, link=dax.Link.OUTPUT, register=False,
            #                    transfer=False)
            #node._outputs += [out_file]
            #out_file.node = node
            #node._add_output(out_file)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        return node, out_files
Ejemplo n.º 50
0
def setup_coincidence_workflow_ligolw_thinca(
        workflow, segsList, timeSlideFiles, inspiral_outs, output_dir,
        veto_cats=[2,3,4], tags=[], timeSlideTags=None,
        parallelize_split_input=False):
    """
    This function is used to setup a single-stage ihope style coincidence stage
    of the workflow using ligolw_sstinca (or compatible code!).

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The workflow instance that the coincidence jobs will be added to.
    segsList : pycbc.workflow.core.FileList
        The list of files returned by workflow's segment module that contains
        pointers to all the segment files generated in the workflow. If the
        coincidence code will be applying the data quality vetoes, then this
        will be used to ensure that the codes get the necessary input to do
        this.
    timeSlideFiles : pycbc.workflow.core.FileList
        An FileList of the timeSlide input files that are needed to
        determine what time sliding needs to be done. One of the timeSlideFiles
        will normally be "zero-lag only", the others containing time slides
        used to facilitate background computations later in the workflow.
    inspiral_outs : pycbc.workflow.core.FileList
        An FileList of the matched-filter module output that is used as
        input to the coincidence codes running at this stage.
    output_dir : path
        The directory in which coincidence output will be stored.
    veto_cats : list of ints (optional, default = [2,3,4])
        Veto categories that will be applied in the coincidence jobs. If this
        takes the default value the code will run data quality at cumulative 
        categories 2, 3 and 4. Note that if we change the flag definitions to
        be non-cumulative then this option will need to be revisited.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
    timeSlideTags : list of strings (optional, default = [])
        A list of the tags corresponding to the timeSlideFiles that are to be
        used in this call to the module. This can be used to ensure that the
        injection runs do no time sliding, but the no-injection runs do perform
        time slides (or vice-versa if you prefer!)
    Returns
    --------
    ligolwThincaOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_sstinca.
    ligolwAddOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_add.
    """
    from pylal import ligolw_cafe

    logging.debug("Entering coincidence module.")
    cp = workflow.cp
    ifoString = workflow.ifo_string

    # setup code for each veto_category

    coinc_outs = FileList([])
    other_outs = {}

    if not timeSlideTags:
        # Get all sections by looking in ini file, use all time slide files.
        timeSlideTags = [(sec.split('-')[-1]).upper()
                  for sec in workflow.cp.sections() if sec.startswith('tisi-')]

    if parallelize_split_input:
        # Want to split all input jobs according to their JOB%d tag.
        # This matches any string that is the letters JOB followed by some
        # numbers and nothing else.
        inspiral_outs_dict = {}
        regex_match = re.compile('JOB([0-9]+)\Z')
        for file in inspiral_outs:
            matches = [regex_match.match(tag) for tag in file.tags]
            # Remove non matching entries
            matches = [i for i in matches if i is not None]
            # Must have one entry
            if len(matches) == 0:
                warn_msg = "I was asked to parallelize over split inspiral "
                warn_msg += "files at the coincidence stage, but at least one "
                warn_msg += "input file does not have a JOB\%d tag indicating "
                warn_msg += "that it was split. Assuming that I do not have "
                warn_msg += "split input files and turning "
                warn_msg += "parallelize_split_input off."
                logging.warn(warn_msg)
                parallelize_split_input = False
                break
            if len(matches) > 1:
                err_msg = "One of my input files has two tags fitting JOB\%d "
                err_msg += "this means I cannot tell which split job this "
                err_msg += "file is from."
                raise ValueError(err_msg)
            # Extract the job ID
            id = int(matches[0].string[3:])
            if not inspiral_outs_dict.has_key(id):
                inspiral_outs_dict[id] = FileList([])
            inspiral_outs_dict[id].append(file)
        else:
            # If I got through all the files I want to sort the dictionaries so
            # that file with key a and index 3 is the same file as key b and
            # index 3 other than the tag is JOBA -> JOBB ... ie. it has used
            # a different part of the template bank.
            sort_lambda = lambda x: (x.ifo_string, x.segment,
                                     x.tagged_description)
            for key in inspiral_outs_dict.keys():
                inspiral_outs_dict[id].sort(key = sort_lambda)
            # These should be in ascending order, so I can assume the existence
            # of a JOB0 tag
            inspiral_outs = inspiral_outs_dict[0]
            for index, file in enumerate(inspiral_outs):
                # Store the index in the file for quicker mapping later
                file.thinca_index = index
    else:
        inspiral_outs_dict = None

    for timeSlideTag in timeSlideTags:
        # Get the time slide file from the inputs
        tisiOutFile = timeSlideFiles.find_output_with_tag(timeSlideTag)
        if not len(tisiOutFile) == 1:
            errMsg = "If you are seeing this, something batshit is going on!"
            if len(tisiOutFile) == 0:
                errMsg = "No time slide files found matching %s." \
                                                                %(timeSlideTag)
            if len(tisiOutFile) > 1:
                errMsg = "More than one time slide files match %s." \
                                                                %(timeSlideTag)
            raise ValueError(errMsg)
        tisiOutFile = tisiOutFile[0]

        # Next we run ligolw_cafe. This is responsible for
        # identifying what times will be used for the ligolw_thinca jobs and
        # what files are needed for each. If doing time sliding there
        # will be some triggers read into multiple jobs
        cacheInspOuts = inspiral_outs.convert_to_lal_cache()
        if workflow.cp.has_option_tags("workflow-coincidence", 
                                       "maximum-extent", tags):
            max_extent = float( workflow.cp.get_opt_tags(
                              "workflow-coincidence", "maximum-extent", tags) )
        else:
            # hard-coded default value for extent of time in a single job
            max_extent = 3600
        logging.debug("Calling into cafe.")
        time_slide_table = lsctables.TimeSlideTable.get_table(\
                ligolw_utils.load_filename(tisiOutFile.storage_path,
                                 gz=tisiOutFile.storage_path.endswith(".gz"),
                                 contenthandler=ContentHandler,
                                 verbose=False))
        time_slide_table.sync_next_id()
        time_slide_dict = time_slide_table.as_dict()

        cafe_seglists, cafe_caches = ligolw_cafe.ligolw_cafe(cacheInspOuts,
            time_slide_dict.values(), extentlimit=max_extent, verbose=False)
        logging.debug("Done with cafe.")

        # Take the combined seglist file
        dqSegFile=segsList.find_output_with_tag('COMBINED_CUMULATIVE_SEGMENTS')
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            print len(dqSegFile), dqSegFile
            raise ValueError(errMsg)
        dqSegFile=dqSegFile[0]

        # Set up llwadd job
        llwadd_tags = [timeSlideTag] + tags 
        ligolwadd_job = LigolwAddExecutable(cp, 'llwadd', ifo=ifoString,
                                          out_dir=output_dir, tags=llwadd_tags)
        ligolwAddOuts = FileList([])

        # Go global setup at each category
        # This flag will add a clustering job after ligolw_thinca
        if workflow.cp.has_option_tags("workflow-coincidence",
                                      "coincidence-post-cluster", llwadd_tags):
            coinc_post_cluster = True
        else:
            coinc_post_cluster = False

        # Go global setup at each category
        ligolwthinca_job = {}
        cluster_job = {}
        thinca_tags = {}
        for category in veto_cats:
            logging.debug("Preparing %s %s" %(timeSlideTag,category))
            dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(category)
            # FIXME: Should we resolve this now?
            # FIXME: Here we set the dqVetoName to be compatible with pipedown
            #        For pipedown must put the slide identifier first and
            #        dqVetoName last.
            pipedownDQVetoName = 'CAT_%d_VETO' %(category)
            curr_thinca_job_tags = [timeSlideTag] + tags + [pipedownDQVetoName]
            thinca_tags[category]=curr_thinca_job_tags
            # Set up jobs for ligolw_thinca
            ligolwthinca_job[category] = LigolwSSthincaExecutable(cp, 'thinca',
                                             ifo=ifoString, out_dir=output_dir,
                                             dqVetoName=dqVetoName,
                                             tags=curr_thinca_job_tags)
            if coinc_post_cluster:
                cluster_job[category] = SQLInOutExecutable(cp, 'pycbccluster',
                                             ifo=ifoString, out_dir=output_dir,
                                             tags=curr_thinca_job_tags)
        
        for idx, cafe_cache in enumerate(cafe_caches):
            ligolwAddOuts = FileList([])
            ligolwThincaOuts = FileList([])
            ligolwThincaLikelihoodOuts = FileList([])
            ligolwClusterOuts = FileList([])

            if not len(cafe_cache.objects):
                raise ValueError("One of the cache objects contains no files!")
        
            # Determine segments to accept coincidences.
            # If cache is not the first or last in the timeseries, check if the
            # two closes caches in the timeseries and see if their extent
            # match. If they match, they're adjacent and use the time where
            # they meet as a bound for accepting coincidences. If they're not
            # adjacent, then there is no bound for accepting coincidences.
            coincStart, coincEnd = None, None
            if idx and (cafe_cache.extent[0] == cafe_caches[idx-1].extent[1]):
                coincStart = cafe_cache.extent[0]
            if idx + 1 - len(cafe_caches) and \
                        (cafe_cache.extent[1] == cafe_caches[idx+1].extent[0]):
                coincEnd = cafe_cache.extent[1]
            coincSegment = (coincStart, coincEnd)
        
            # Need to create a list of the File(s) contained in the cache.
            # Assume that if we have partitioned input then if *one* job in the
            # partitioned input is an input then *all* jobs will be.
            if not parallelize_split_input:
                inputTrigFiles = FileList([])
                for object in cafe_cache.objects:
                    inputTrigFiles.append(object.workflow_file)
        
                llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]
        
                # Now we can create the nodes
                node = ligolwadd_job.create_node(cafe_cache.extent, llw_files)
                ligolwAddFile = node.output_files[0]
                ligolwAddOuts.append(ligolwAddFile)
                workflow.add_node(node)
                for category in veto_cats:
                    node = ligolwthinca_job[category].create_node(\
                                cafe_cache.extent, coincSegment, ligolwAddFile)
                    ligolwThincaOuts += \
                        node.output_files.find_output_without_tag('DIST_STATS')
                    ligolwThincaLikelihoodOuts += \
                           node.output_files.find_output_with_tag('DIST_STATS')
                    workflow.add_node(node)
                    if coinc_post_cluster:
                        node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                        ligolwClusterOuts += node.output_files
                        workflow.add_node(node)
            else:
                for key in inspiral_outs_dict.keys():
                    curr_tags = ["JOB%d" %(key)]
                    curr_list = inspiral_outs_dict[key]
                    inputTrigFiles = FileList([])
                    for object in cafe_cache.objects:
                        inputTrigFiles.append(
                                  curr_list[object.workflow_file.thinca_index])
        
                    llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]

                    # Now we can create the nodes
                    node = ligolwadd_job.create_node(cafe_cache.extent,
                                                     llw_files, tags=curr_tags)
                    ligolwAddFile = node.output_files[0]
                    ligolwAddOuts.append(ligolwAddFile)
                    workflow.add_node(node)
                    if workflow.cp.has_option_tags("workflow-coincidence",
                          "coincidence-write-likelihood",curr_thinca_job_tags):
                        write_likelihood=True
                    else:
                        write_likelihood=False
                    for category in veto_cats:
                        node = ligolwthinca_job[category].create_node(\
                             cafe_cache.extent, coincSegment, ligolwAddFile,
                             tags=curr_tags, write_likelihood=write_likelihood)
                        ligolwThincaOuts += \
                               node.output_files.find_output_without_tag(\
                                                                  'DIST_STATS')
                        ligolwThincaLikelihoodOuts += \
                              node.output_files.find_output_with_tag(\
                                                                  'DIST_STATS')
                        workflow.add_node(node)
                        if coinc_post_cluster:
                            node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                            ligolwClusterOuts += node.output_files
                            workflow.add_node(node)

            other_returns = {}
            other_returns['LIGOLW_ADD'] = ligolwAddOuts
            other_returns['DIST_STATS'] = ligolwThincaLikelihoodOuts
        
            if coinc_post_cluster:
                main_return = ligolwClusterOuts
                other_returns['THINCA'] = ligolwThincaOuts
            else:
                main_return = ligolwThincaOuts
        
            logging.debug("Done")
            coinc_outs.extend(main_return)
            for key, file_list in other_returns.items():
                if other_outs.has_key(key):
                    other_outs[key].extend(other_returns[key])
                else:
                    other_outs[key] = other_returns[key]
    return coinc_outs, other_outs
Ejemplo n.º 51
0
def setup_segment_gen_mixed(workflow, veto_categories, out_dir, 
                            maxVetoAtRunTime, tag=None,
                            generate_coincident_segs=True):
    """
    This function will generate veto files for each ifo and for each veto
    category.
    It can generate these vetoes at run-time or in the workflow (or do some at
    run-time and some in the workflow). However, the CAT_1 vetoes and science
    time must be generated at run time as they are needed to plan the workflow.
    CATs 2 and higher *may* be needed for other workflow construction.
    It can also combine these files to create a set of cumulative,
    multi-detector veto files, which can be used in ligolw_thinca and in
    pipedown. Again these can be created at run time or within the workflow.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
        This instance also contains the ifos for which to attempt to obtain
        segments for this analysis and the start and end times to search for
        segments over.
    veto_categories : list of ints
        List of veto categories to generate segments for. If this stops being
        integers, this can be changed here.
    out_dir : path
        The directory in which output will be stored.    
    maxVetoAtRunTime : int
        Generate veto files at run time up to this category. Veto categories
        beyond this in veto_categories will be generated in the workflow.
        If we move to a model where veto
        categories are not explicitly cumulative, this will be rethought.
    tag : string, optional (default=None)
        Use this to specify a tag. This can be used if this module is being
        called more than once to give call specific configuration (by setting
        options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This
        is also used to tag the Files returned by the class to uniqueify
        the Files and uniqueify the actual filename.
        FIXME: Filenames may not be unique with current codes!
    generate_coincident_segs : boolean, optional (default = True)
        If given this module will generate a set of coincident, cumulative veto
        files that can be used with ligolw_thinca and pipedown.

    Returns
    -------
    segFilesList : dictionary of pycbc.workflow.core.SegFile instances
        These are representations of the various segment files that were
        constructed
        at this stage of the workflow and may be needed at later stages of the
        analysis (e.g. for performing DQ vetoes). If the file was generated at
        run-time the segment lists contained within these files will be an
        attribute
        of the instance. (If it will be generated in the workflow it will 
        not be because I am not psychic).
    """
    cp = workflow.cp
    segFilesList = FileList([])
    start_time = workflow.analysis_time[0]
    end_time = workflow.analysis_time[1]
    segValidSeg = workflow.analysis_time
    # Will I need to add some jobs to the workflow?
    vetoGenJob = create_segs_from_cats_job(cp, out_dir, workflow.ifo_string)
    
    for ifo in workflow.ifos:
        logging.info("Generating science segments for ifo %s" %(ifo))
        currSciSegs, currSciXmlFile = get_science_segments(ifo, cp, start_time,
                                                    end_time, out_dir, tag=tag)
        segFilesList.append(currSciXmlFile)

        for category in veto_categories:
            if category > maxVetoAtRunTime:
                msg = "Adding creation of CAT_%d segments " %(category)
                msg += "for ifo %s to workflow." %(ifo)
                logging.info(msg)
                execute_status = False
                                 
            if category <= maxVetoAtRunTime:
                logging.info("Generating CAT_%d segments for ifo %s." \
                             %(category,ifo))
                execute_status = True

            currVetoXmlFile = get_veto_segs(workflow, ifo, category, 
                                                start_time, end_time, out_dir,
                                                vetoGenJob, 
                                                execute_now=execute_status)  

            segFilesList.append(currVetoXmlFile) 
            # Store the CAT_1 veto segs for use below
            if category == 1:
                # Yes its yucky to generate a file and then read it back in. 
                #This will be
                # fixed when the new API for segment generation is ready.
                vetoXmlFP = open(currVetoXmlFile.storage_path, 'r')
                cat1Segs = fromsegmentxml(vetoXmlFP)
                vetoXmlFP.close()
                
        analysedSegs = currSciSegs - cat1Segs
        analysedSegs.coalesce()
        analysedXmlFile = os.path.join(out_dir,
                             "%s-SCIENCE_OK_SEGMENTS.xml" %(ifo.upper()) )
        currUrl = urlparse.urlunparse(['file', 'localhost', analysedXmlFile,
                          None, None, None])
        if tag:
            currTags = [tag, 'SCIENCE_OK']
        else:
            currTags = ['SCIENCE_OK']
        currFile = OutSegFile(ifo, 'SEGMENTS',
                              segValidSeg, currUrl, segment_list=analysedSegs,
                              tags = currTags)
        segFilesList.append(currFile)
        currFile.toSegmentXml()


    if generate_coincident_segs:
        # Need to make some combined category veto files to use when vetoing
        # segments and triggers.
        ifo_string = workflow.ifo_string
        categories = []
        cum_cat_files = []
        for category in veto_categories:
            categories.append(category)
            # Set file name in workflow standard
            if tag:
                currTags = [tag, 'CUMULATIVE_CAT_%d' %(category)]
            else:
                currTags = ['CUMULATIVE_CAT_%d' %(category)]

            cumulativeVetoFile = os.path.join(out_dir,
                                   '%s-CUMULATIVE_CAT_%d_VETO_SEGMENTS.xml' \
                                   %(ifo_string, category) )
            currUrl = urlparse.urlunparse(['file', 'localhost',
                                         cumulativeVetoFile, None, None, None])
            currSegFile = OutSegFile(ifo_string, 'SEGMENTS',
                                   segValidSeg, currUrl, tags=currTags)
            # And actually make the file (or queue it in the workflow)
            logging.info("Generating combined, cumulative CAT_%d segments."\
                             %(category))
            if category <= maxVetoAtRunTime:
                execute_status = True
            else:
                execute_status = False
            get_cumulative_segs(workflow, currSegFile,  categories,
                                segFilesList, out_dir, 
                                execute_now=execute_status)

            segFilesList.append(currSegFile)
            cum_cat_files.append(currSegFile)
        # Create a combined file
        # Set file tag in workflow standard
        if tag:
            currTags = [tag, 'COMBINED_CUMULATIVE_SEGMENTS']
        else:
            currTags = ['COMBINED_CUMULATIVE_SEGMENTS']

        combined_veto_file = os.path.join(out_dir,
                               '%s-CUMULATIVE_ALL_CATS_SEGMENTS.xml' \
                               %(ifo_string) )
        curr_url = urlparse.urlunparse(['file', 'localhost',
                                       combined_veto_file, None, None, None])
        curr_file = OutSegFile(ifo_string, 'SEGMENTS',
                               segValidSeg, curr_url, tags=currTags)

        for category in veto_categories:
            if category <= maxVetoAtRunTime:
                execute_status = True
                break
        else:
            execute_status = False
        add_cumulative_files(workflow, curr_file, cum_cat_files, out_dir,
                             execute_now=execute_status)
        segFilesList.append(curr_file)

    return segFilesList
Ejemplo n.º 52
0
def get_cumulative_veto_group_files(workflow, option, out_dir, tags=[]):
    """
    Get the cumulative veto files that define the different backgrounds 
    we want to analyze, defined by groups of vetos.

    Parameters
    -----------
    workflow : Workflow object
        Instance of the workflow object
    option : str
        ini file option to use to get the veto groups
    out_dir : path
        Location to store output files
    tags : list of strings
        Used to retrieve subsections of the ini file for
        configuration options.

    Returns
    --------
    seg_files : workflow.core.FileList instance
        The cumulative segment files for each veto group.   
    cat_files : workflow.core.FileList instance
        The list of individual category veto files
    """
    make_analysis_dir(out_dir)
    start_time = workflow.analysis_time[0]
    end_time = workflow.analysis_time[1]

    cat_sets = parse_cat_ini_opt(workflow.cp.get_opt_tags('workflow-segments',
                                            option, tags))
    veto_gen_job = create_segs_from_cats_job(workflow.cp, out_dir,
                                             workflow.ifo_string) 
    cats = set()
    for cset in cat_sets:
        cats = cats.union(cset)
    
    cat_files = FileList()
    for ifo in workflow.ifos:
        for category in cats:
            cat_files.append(get_veto_segs(workflow, ifo,
                                        cat_to_pipedown_cat(category), 
                                        start_time, end_time, out_dir,
                                        veto_gen_job, execute_now=True))

    cum_seg_files = FileList()     
    names = []   
    for cat_set in cat_sets:
        segment_name = "CUMULATIVE_CAT_%s" % (''.join(sorted(cat_set)))
        logging.info('getting information for %s' % segment_name)
        categories = [cat_to_pipedown_cat(c) for c in cat_set]
        path = os.path.join(out_dir, '%s-%s_VETO_SEGMENTS.xml' \
                            % (workflow.ifo_string, segment_name))
        path = os.path.abspath(path)
        url = urlparse.urlunparse(['file', 'localhost', path, None, None, None])
        seg_file = File(workflow.ifos, 'CUM_VETOSEGS', workflow.analysis_time,
                        file_url=url, tags=[segment_name])
                        
        cum_seg_files += [get_cumulative_segs(workflow, seg_file,  categories,
              cat_files, out_dir, execute_now=True, segment_name=segment_name)]
        names.append(segment_name)
              
    return cum_seg_files, names, cat_files
Ejemplo n.º 53
0
def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False):
    """
    Retrieve files needed to run coh_PTF jobs within a PyGRB workflow

    Parameters
    ----------
    cp : pycbc.workflow.configuration.WorkflowConfigParser object
    The parsed configuration options of a pycbc.workflow.core.Workflow.

    ifos : str
    String containing the analysis interferometer IDs.

    run_dir : str
    The run directory, destination for retrieved files.

    bank_veto : Boolean
    If true, will retrieve the bank_veto_bank.xml file.

    summary_files : Boolean
    If true, will retrieve the summary page style files.

    Returns
    -------
    file_list : pycbc.workflow.FileList object
    A FileList containing the retrieved files.
    """
    if os.getenv("LAL_SRC") is None:
        raise ValueError("The environment variable LAL_SRC must be set to a "
                         "location containing the file lalsuite.git")
    else:
        lalDir = os.getenv("LAL_SRC")
        sci_seg = segments.segment(int(cp.get("workflow", "start-time")),
                                   int(cp.get("workflow", "end-time")))
        file_list = FileList([])

        # Bank veto
        if bank_veto:
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "bank_veto_bank.xml" % lalDir, "%s" % run_dir)
            bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir
            bank_veto = File(ifos, "bank_veto_bank", sci_seg,
                             file_url=bank_veto_url)
            bank_veto.PFN(bank_veto.cache_entry.path, site="local")
            file_list.extend(FileList([bank_veto]))

        if summary_files:
            # summary.js file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir)
            summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \
                             % run_dir
            summary_js = File(ifos, "coh_PTF_html_summary_js", sci_seg,
                              file_url=summary_js_url)
            summary_js.PFN(summary_js.cache_entry.path, site="local")
            file_list.extend(FileList([summary_js]))

            # summary.css file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir)
            summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \
                              % run_dir
            summary_css = File(ifos, "coh_PTF_html_summary_css", sci_seg,
                               file_url=summary_css_url)
            summary_css.PFN(summary_css.cache_entry.path, site="local")
            file_list.extend(FileList([summary_css]))

        return file_list
Ejemplo n.º 54
0
def setup_postproc_coh_PTF_workflow(workflow, trig_files, trig_cache,
                                    inj_trig_files, inj_files, inj_trig_caches,
                                    inj_caches, config_file, output_dir,
                                    html_dir, segment_dir, ifos, inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")


    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp, "injfinder", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp, "injcombiner", ifo=ifos,
                                             out_dir=output_dir, tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tag_str:
                injcombiner_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos, "foundmissed", full_segment,
                        extension="lcf", directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Initialise html_summary class
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            if out_tag == "OFFSOURCE":
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                            segment_dir, inj_file=curr_injs, tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for parent_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=parent_node._dax_node,
                                             child=sbv_plotter_node._dax_node)
                        workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(unclust_file,
                    segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                    offsource_clustered, segment_dir, tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(parent=injfinder_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = ' '.join(html_summary_node.get_command_line())
    open_box_cmd += "--open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Ejemplo n.º 55
0
def setup_postproc_pipedown_workflow(workflow, trigger_files, summary_xml_files,
                                  output_dir, tags=[], veto_cats=[]):
    """
    This module sets up the post-processing stage in the workflow, using a pipedown
    style set up. This consists of running compute_durations to determine and
    store the analaysis time (foreground and background). It then runs cfar
    jobs to determine the false alarm rate for all triggers (simulations or
    otherwise) in the input database.
    Pipedown expects to take as input (at this stage) a single database
    containing all triggers. This sub-module follows that same idea, so
    len(triggerFiles) must equal 1 (for every DQ category that we will run).

    Parameters
    ----------  
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList containing the combined databases at CAT_1,2,3... that
        will be used to calculate FARs
    summary_xml_files : pycbc.workflow.core.FileList (required)
        A FileList of the output of the analysislogging_utils module.
        For pipedown-style post-processing this should be one file containing
        a segment table holding the single detector analysed times.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    veto_cats : list of integers (default = [], non-empty list required)
        Decide which veto category levels should be used in post-processing.
        For example tell the workflow to only generate results at cumulative
        categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    final_files : pycbc.workflow.core.FileList
        A list of the final SQL databases containing computed FARs.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    compute_durations_exe_tag = workflow.cp.get_opt_tags("workflow-postproc",
                                   "postproc-computedurations-exe", tags)
    compute_durations_exe = select_generic_executable(workflow,
                                                     compute_durations_exe_tag)
    cfar_exe_tag = workflow.cp.get_opt_tags("workflow-postproc", "postproc-cfar-exe",
                                       tags)
    cfar_exe = select_generic_executable(workflow, cfar_exe_tag)

    comp_durations_outs = FileList([])
    cfar_outs = FileList([])

    for cat in veto_cats:

        veto_tag = 'CUMULATIVE_CAT_%d' %(cat)
        trig_input_files = trigger_files.find_output_with_tag(veto_tag)
        if not len(trig_input_files) == 1:
            err_msg = "Did not find exactly 1 database input file."
            raise ValueError(err_msg)

        curr_tags = tags + [veto_tag]

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Start with compute durations
        computeDurationsJob = compute_durations_exe(workflow.cp, compute_durations_exe_tag,
                                                 ifo=workflow.ifo_string, 
                                                 out_dir=output_dir, 
                                                 tags=curr_tags)
        compute_durations_node = computeDurationsJob.create_node(
                                    workflow.analysis_time, trig_input_files[0],
                                    summary_xml_files[0])
        compute_durations_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(compute_durations_node)

        # Node has only one output file
        compute_durations_out = compute_durations_node.output_files[0]
        comp_durations_outs.append(compute_durations_out)

        # Add the calculate FAR (cfar) job
        cfar_job = cfar_exe(workflow.cp, cfar_exe_tag, 
                                      ifo=workflow.ifo_string, 
                                      out_dir=output_dir, 
                                      tags=curr_tags)
        cfar_node = cfar_job.create_node(workflow.analysis_time,
                                       compute_durations_out)
        cfar_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(cfar_node)

        # Node has only one output file
        cfar_out = cfar_node.output_files[0]
        cfar_outs.append(cfar_out)

    return cfar_outs
Ejemplo n.º 56
0
def setup_postprocprep_gstlal_workflow(workflow, coinc_files, output_dir,
                                       tags=[], injection_files=None,
                                       veto_files=None, inj_less_tag=None,
                                       injection_tags=[], veto_cat=None,
                                       summary_xml_files=None,
                                       likelihood_files=[]):
    """
    Parameters
    -----------
    workflow : workflow.Workflow
        The workflow instance that the coincidence jobs will be added to.
    coinc_files : workflow.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    injection_files : workflow.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    veto_files : workflow.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    inj_less_tag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injection_tags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cat : int (optional, default = None)
        FIXME: How does gstlal deal with veto categories?
        Hardcode to CAT1 for now.
    summary_xml_files : workflow.FileList
        An FileList of the output of the analysislogging_utils module.
        Here, this will be one file that includes the segments analysed by the
        workflow.

    Returns
    --------
    finalFiles : workflow.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : workflow.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : workflow.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : workflow.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    # Sanity checks
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    run_sqlite_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-runsqlite-exe", tags)
    ligolw_sqlite_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-ligolwsqlite-exe", tags) 
    inspinjfind_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-inspinjfind-exe", tags)
    sql_to_xml_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-sqltoxml-exe", tags)
    pycbc_picklehor_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-picklehor-exe", tags)
    pycbc_combllhood_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combllhood-exe", tags)
    pycbc_genranking_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-genranking-exe", tags)
    pycbc_compllhood_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-compllhood-exe", tags)
    marg_likelihood_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-marglikelihood-exe", tags)
    far_gstlal_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-fargstlal-exe", tags)
    plot_summary_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotsummary-exe", tags)
    plot_sensitivity_exe_name=workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotsensitivity-exe", tags)
    plot_background_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-plotbackground-exe", tags)
    summary_page_exe_name = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-summarypage-exe", tags)


    run_sqlite_exe = select_generic_executable(workflow, run_sqlite_exe_name)
    ligolw_sqlite_exe = select_generic_executable(workflow,
                                                        ligolw_sqlite_exe_name)
    inspinjfind_exe = select_generic_executable(workflow, inspinjfind_exe_name)
    sql_to_xml_exe = select_generic_executable(workflow, sql_to_xml_exe_name)
    pycbc_picklehor_exe = select_generic_executable(workflow,
                                                      pycbc_picklehor_exe_name)
    pycbc_combllhood_exe = select_generic_executable(workflow,
                                                     pycbc_combllhood_exe_name)
    pycbc_genranking_exe = select_generic_executable(workflow,
                                                     pycbc_genranking_exe_name)
    pycbc_compllhood_exe = select_generic_executable(workflow,
                                                     pycbc_compllhood_exe_name)
    marg_likelihood_exe = select_generic_executable(workflow,
                                                      marg_likelihood_exe_name)
    far_gstlal_exe = select_generic_executable(workflow, far_gstlal_exe_name)
    plot_summary_exe = select_generic_executable(workflow,
                                                         plot_summary_exe_name)
    plot_sensitivity_exe = select_generic_executable(workflow,
                                                     plot_sensitivity_exe_name)
    plot_background_exe = select_generic_executable(workflow,
                                                      plot_background_exe_name)
    summary_page_exe = select_generic_executable(workflow,
                                                         summary_page_exe_name)


    # SETUP
    # FIXME: Some hacking is still needed while we support pipedown
    # FIXME: How does gstlal deal with veto categories?
    #         Hardcode to CAT1 for now.
    veto_tag = 'CUMULATIVE_CAT_%d' %(veto_cat,)
    dq_seg_file = veto_files.find_output_with_tag(veto_tag)
    assert len(dq_seg_file) == 1
    dq_seg_file = dq_seg_file[0]
    #if not len(dqSegFile) == 1:
    #    errMsg = "Did not find exactly 1 data quality file."
    #    raise ValueError(errMsg)
    # FIXME: Here we set the dqVetoName to be compatible with pipedown
    pipedown_dq_veto_name = 'CAT_%d_VETO' %(veto_cat,)

    # First we need to covert to SQL, this is STAGE0
    # Do for all injection runs and zero lag
    stage0_outputs = {}
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        trig_veto_inp_files = \
                  coinc_files.find_output_with_tag(pipedown_dq_veto_name)
        trig_inp_files = trig_veto_inp_files.find_output_with_tag(inj_tag)
        stage0_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE0'] + curr_tags)
        stage0_outputs[inj_tag] = FileList([])
        assert len(trig_inp_files) > 0
        for file in trig_inp_files:
            stage0_node = stage0_job.create_node(file.segment, [file])
            workflow.add_node(stage0_node)
            # Node has only one output file
            stage0_out = stage0_node.output_files[0]
            stage0_outputs[inj_tag].append(stage0_out)

    curr_tags = tags + [veto_tag]

    # NOW WE DO LIKELIHOOD SETUP
    pycbc_picklehor_job = pycbc_picklehor_exe(workflow.cp,
                                  pycbc_picklehor_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    pycbc_combllhood_job = pycbc_combllhood_exe(workflow.cp,
                                  pycbc_combllhood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    pycbc_genranking_job = pycbc_genranking_exe(workflow.cp, 
                                  pycbc_genranking_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=curr_tags)
    marg_likelihood_job_1 = marg_likelihood_exe(workflow.cp,
                                  marg_likelihood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=['MARG1']+curr_tags)
    marg_likelihood_job_2 = marg_likelihood_exe(workflow.cp,
                                  marg_likelihood_exe_name,
                                  ifo=workflow.ifo_string,
                                  out_dir=output_dir,
                                  tags=['MARG2']+curr_tags)


    # Begin with finding the horizon distances
    picklehor_inputs = stage0_outputs[inj_less_tag]
    node = pycbc_picklehor_job.create_node(workflow.analysis_time,
                                                              picklehor_inputs)
    workflow.add_node(node)
    horizon_dist_file = node.output_files[0]
    # Then combine all likelihood files
    combllhood_inputs = likelihood_files.find_output_with_tag(\
                                                         pipedown_dq_veto_name) 
    combllhood_inputs = combllhood_inputs.find_output_with_tag(inj_less_tag)
    assert len(combllhood_inputs) > 0
    node = pycbc_combllhood_job.create_node(workflow.analysis_time,
                                          combllhood_inputs, horizon_dist_file)
    workflow.add_node(node)
    likelihood_file = node.output_files[0]
    # Also compute the ranking file
    node = pycbc_genranking_job.create_node(workflow.analysis_time,
                                            likelihood_file, horizon_dist_file)
    workflow.add_node(node)
    ranking_likelihood_file = node.output_files[0]
    # And marginalize (twice for some reason!)
    node = marg_likelihood_job_1.create_node(workflow.analysis_time,
                                                       ranking_likelihood_file)
    workflow.add_node(node)
    marg_likelihood_file_1 = node.output_files[0]
    node = marg_likelihood_job_2.create_node(workflow.analysis_time,
                                                        marg_likelihood_file_1)
    workflow.add_node(node)
    marg_likelihood_file_2 = node.output_files[0]

    # Now do the sqlite conditioning. This has a few stages.
                                                  
    # STAGE 1: Populate likelihood in all input files
    # STAGE 2: Run run_sqlite on all outputs of stage 1
    # STAGE 3: Combine all files into one sqlite file
    # STAGE 4: Run run_sqlite on outputs of stage 3
    # STAGE 5: Add segments.xml and inj.xml
    # STAGE 6: Run run_sqlite (cluster an simplify) on outputs of stage 5
    # STAGE 7: Dump SQL database to xml
    # STAGE 8: Run injfind on the xml document
    # STAGE 9: Convert back to SQL

    stage1_outputs = {}
    stage2_outputs = {}
    stage3_outputs = {}
    stage4_outputs = {}
    stage5_outputs = {}
    stage6_outputs = {}
    stage7_outputs = {}
    stage8_outputs = {}
    stage9_outputs = {}
    final_outputs = FileList([])
    # Do for all injection runs and zero lag
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        trig_inp_files = stage0_outputs[inj_tag]
        stage1_job = pycbc_compllhood_exe(workflow.cp,
                                      pycbc_compllhood_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE1']+curr_tags)
        stage2_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE2'] + curr_tags)
        stage3_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE3'] + curr_tags)
        stage4_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE4'] + curr_tags)
        stage5_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                      ifo=workflow.ifo_string,
                                      out_dir=output_dir,
                                      tags=['STAGE5'] + curr_tags)
        if inj_tag == inj_less_tag:
            # For zero-lag we stop here, so use the FINAL tag to indicate this
            stage6_zl_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['FINAL'] + curr_tags)
        else:
            stage6_job = run_sqlite_exe(workflow.cp, run_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE6'] + curr_tags)
            stage7_job = sql_to_xml_exe(workflow.cp, sql_to_xml_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE7'] + curr_tags)
            stage8_job = inspinjfind_exe(workflow.cp, inspinjfind_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['STAGE8'] + curr_tags)
            stage9_job = ligolw_sqlite_exe(workflow.cp, ligolw_sqlite_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=['FINAL'] + curr_tags)

        stage1_outputs[inj_tag] = FileList([])
        stage2_outputs[inj_tag] = FileList([])
        assert len(trig_inp_files) > 0
        for file in trig_inp_files:
            stage1_node = stage1_job.create_node(file.segment, file,
                                            likelihood_file, horizon_dist_file)
            workflow.add_node(stage1_node)
            # Node has only one output file
            stage1_out = stage1_node.output_files[0]
            stage1_outputs[inj_tag].append(stage1_out)
            stage2_node = stage2_job.create_node(stage1_out.segment,
                                                                    stage1_out)
            workflow.add_node(stage2_node)
            # Node has only one output file
            stage2_out = stage2_node.output_files[0]
            stage2_outputs[inj_tag].append(stage2_out)

        stage3_node = stage3_job.create_node(workflow.analysis_time,
                                    stage2_outputs[inj_tag], workflow=workflow)
        workflow.add_node(stage3_node)
        # Node has only one output file
        stage3_out = stage3_node.output_files[0]
        stage3_outputs[inj_tag] = stage3_out
        stage4_node = stage4_job.create_node(workflow.analysis_time,
                                                                    stage3_out)
        workflow.add_node(stage4_node)
        # Node has only one output file
        stage4_out = stage4_node.output_files[0]
        stage4_outputs[inj_tag] = stage4_out

        stage5_inputs = [stage4_out]
        stage5_inputs.append(summary_xml_files[0])
        stage5_inputs.append(dq_seg_file)
        if inj_tag != inj_less_tag:
            inj_file = injection_files.find_output_with_tag(inj_tag)
            assert (len(inj_file) == 1)
            stage5_inputs.append(inj_file[0])
        stage5_node = stage5_job.create_node(workflow.analysis_time,
                                                                 stage5_inputs)
        workflow.add_node(stage5_node)
        # Node has only one output file
        stage5_out = stage5_node.output_files[0]
        stage5_outputs[inj_tag] = stage5_out
  
        if inj_tag == inj_less_tag:
            stage6_node = stage6_zl_job.create_node(workflow.analysis_time,
                                                                    stage5_out)
            workflow.add_node(stage6_node)
            stage6_out = stage6_node.output_files[0]
            stage6_outputs[inj_tag] = stage6_out
            final_outputs.append(stage6_out)
        else:
            stage6_node = stage6_job.create_node(workflow.analysis_time,
                                                                    stage5_out)
            workflow.add_node(stage6_node)
            stage6_out = stage6_node.output_files[0]
            stage6_outputs[inj_tag] = stage6_out
            stage7_node = stage7_job.create_node(workflow.analysis_time,
                                                                    stage6_out)
            workflow.add_node(stage7_node)
            stage7_out = stage7_node.output_files[0]
            stage7_outputs[inj_tag] = stage7_out
            stage8_node = stage8_job.create_node(workflow.analysis_time,
                                                                    stage7_out)
            workflow.add_node(stage8_node)
            stage8_out = stage8_node.output_files[0]
            stage8_outputs[inj_tag] = stage8_out
            stage9_node = stage9_job.create_node(workflow.analysis_time,
                                                                  [stage8_out])
            workflow.add_node(stage9_node)
            stage9_out = stage9_node.output_files[0]
            stage9_outputs[inj_tag] = stage9_out
            final_outputs.append(stage9_out)

    # Next we run the compute FAR from snr_chisq histograms job
    far_gstlal_outputs = {}
    for inj_tag in [inj_less_tag] + injection_tags:
        curr_tags = tags + [inj_tag, veto_tag]
        far_gstlal_job = far_gstlal_exe(workflow.cp, far_gstlal_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
        trig_veto_inp_files = \
                  final_outputs.find_output_with_tag(veto_tag)
        trig_inp_files = trig_veto_inp_files.find_output_with_tag(inj_tag)
        assert len(trig_inp_files) == 1
        input_database = trig_inp_files[0]
        if inj_tag != inj_less_tag:
            no_inj_db = trig_veto_inp_files.find_output_with_tag(inj_less_tag)
            assert len(no_inj_db) == 1
            no_inj_db = no_inj_db[0]
            write_background = False
        else:
            # Here I don't want to provide the same file as a dependancy
            # twice. Therefore I just give non-injection DB and the code
            # assumes this is also the input-database if it is not given.
            # Also, I only want the background file once
            no_inj_db =  input_database
            input_database = None
            write_background = True
        far_gstlal_node = far_gstlal_job.create_node(workflow.analysis_time,
                                        no_inj_db, marg_likelihood_file_2,
                                        inj_database=input_database,
                                        write_background_bins=write_background)
        workflow.add_node(far_gstlal_node)
        outputs = far_gstlal_node.output_files
        if inj_tag != inj_less_tag:
            assert len(outputs) == 1
            far_gstlal_outputs[inj_tag] = outputs[0]
        else:
            assert len(outputs) == 2
            sql_out = outputs.find_output_without_tag('POSTMARG')[0]
            xml_out = outputs.find_output_with_tag('POSTMARG')[0]
            far_gstlal_outputs[inj_tag] = sql_out
            post_marginalized_file = xml_out
            

    # Finally some plotting. 
    # FIXME: These are given explicit output directories and pegasus does not
    # know about output files. Would be nice if this was done "better"  
    curr_tags = tags + [veto_tag]
    plot_summary_job = plot_summary_exe(workflow.cp, plot_summary_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    plot_sensitivity_job = plot_sensitivity_exe(workflow.cp,
                                          plot_sensitivity_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    plot_background_job = plot_background_exe(workflow.cp,
                                          plot_background_exe_name,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir, tags=curr_tags)
    inj_dbs = []
    for inj_tag in injection_tags:
        inj_dbs.append(far_gstlal_outputs[inj_tag])
    non_inj_db = far_gstlal_outputs[inj_less_tag]
    
    plot_summary_node = plot_summary_job.create_node(non_inj_db, inj_dbs)
    plot_background_node = plot_background_job.create_node(non_inj_db,
                                                        post_marginalized_file)
    plot_sensitivity_node = plot_sensitivity_job.create_node(non_inj_db,
                                                                       inj_dbs)

    workflow.add_node(plot_summary_node)
    workflow.add_node(plot_background_node)
    workflow.add_node(plot_sensitivity_node)

    # And make the html pages
    parents = [plot_summary_node, plot_background_node, plot_sensitivity_node]
    closed_summarypage_job = summary_page_exe(workflow.cp,
                                              summary_page_exe_name,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=['CLOSEDBOX'] + curr_tags)
    open_summarypage_job = summary_page_exe(workflow.cp, 
                                              summary_page_exe_name,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=['OPENBOX'] + curr_tags)

    closed_summarypage_node = closed_summarypage_job.create_and_add_node(\
                                              workflow, parents)
    open_summarypage_node = open_summarypage_job.create_and_add_node(workflow,
                                              parents)

    # FIXME: Maybe contatenate and return all other outputs if needed elsewhere
    # FIXME: Move to pp utils and return the FAR files.
    return final_outputs
Ejemplo n.º 57
0
def setup_timeslides_workflow(workflow, output_dir=None, tags=[],
                              timeSlideSectionName='ligolw_tisi'):
    '''
    Setup generation of time_slide input files in the workflow.
    Currently used
    only with ligolw_tisi to generate files containing the list of slides to be
    performed in each time slide job.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.
    timeSlideSectionName : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    Returns
    --------
    timeSlideOuts : pycbc.workflow.core.FileList
        The list of time slide files created by this call.
    '''
    logging.info("Entering time slides setup module.")
    make_analysis_dir(output_dir)
    # Get ifo list and full analysis segment for output file naming
    ifoList = workflow.ifos
    ifo_string = workflow.ifo_string
    fullSegment = workflow.analysis_time

    # Identify which time-slides to do by presence of sub-sections in the
    # configuration file
    all_sec = workflow.cp.sections()
    timeSlideSections = [sec for sec in all_sec if sec.startswith('tisi-')]
    timeSlideTags = [(sec.split('-')[-1]).upper() for sec in timeSlideSections]

    timeSlideOuts = FileList([])

    # FIXME: Add ability to specify different exes

    # Make the timeSlideFiles
    for timeSlideTag in timeSlideTags:
        currTags = tags + [timeSlideTag]

        timeSlideMethod = workflow.cp.get_opt_tags("workflow-timeslides",
                                                 "timeslides-method", currTags)

        if timeSlideMethod in ["IN_WORKFLOW", "AT_RUNTIME"]:
            timeSlideExeTag = workflow.cp.get_opt_tags("workflow-timeslides",
                                                    "timeslides-exe", currTags)
            timeSlideExe = select_generic_executable(workflow, timeSlideExeTag)
            timeSlideJob = timeSlideExe(workflow.cp, timeSlideExeTag, ifos=ifo_string,
                                             tags=currTags, out_dir=output_dir)
            timeSlideNode = timeSlideJob.create_node(fullSegment)
            if timeSlideMethod == "AT_RUNTIME":
                workflow.execute_node(timeSlideNode)
            else:
                workflow.add_node(timeSlideNode)
            tisiOutFile = timeSlideNode.output_files[0]
        elif timeSlideMethod == "PREGENERATED":
            timeSlideFilePath = workflow.cp.get_opt_tags("workflow-timeslides",
                                      "timeslides-pregenerated-file", currTags)
            file_url = urlparse.urljoin('file:', urllib.pathname2url(\
                                                  timeSlideFilePath))
            tisiOutFile = File(ifoString, 'PREGEN_TIMESLIDES',
                               fullSegment, file_url, tags=currTags)

        timeSlideOuts.append(tisiOutFile)

    return timeSlideOuts
Ejemplo n.º 58
0
def setup_injection_workflow(workflow, output_dir=None,
                             inj_section_name='injections', tags =[]):
    """
    This function is the gateway for setting up injection-generation jobs in a
    workflow. It should be possible for this function to support a number
    of different ways/codes that could be used for doing this, however as this
    will presumably stay as a single call to a single code (which need not be
    inspinj) there are currently no subfunctions in this moudle. 

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which injection files will be stored.
    inj_section_name : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.

    Returns
    --------
    inj_files : pycbc.workflow.core.FileList
        The list of injection files created by this call.
    inj_tags : list of strings
        The tag corresponding to each injection file and used to uniquely
        identify them. The FileList class contains functions to search
        based on tags.
    """
    logging.info("Entering injection module.")
    make_analysis_dir(output_dir)
    
    # Get full analysis segment for output file naming
    full_segment = workflow.analysis_time

    inj_tags = []
    inj_files = FileList([])  

    for section in  workflow.cp.get_subsections(inj_section_name):
        inj_tag = section.upper()
        curr_tags = tags + [inj_tag]

        # FIXME: Remove once fixed in pipedown
        # TEMPORARILY we require inj tags to end in "INJ"
        if not inj_tag.endswith("INJ"):
            err_msg = "Currently workflow requires injection names to end with "
            err_msg += "a inj suffix. Ie. bnslininj or bbhinj. "
            err_msg += "%s is not good." %(inj_tag.lower())
            raise ValueError(err_msg)

        # Parse for options in ini file
        injection_method = workflow.cp.get_opt_tags("workflow-injections", 
                                                 "injections-method", curr_tags)

        if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]:
            # FIXME: Add ability to specify different exes
            inj_job = LalappsInspinjExecutable(workflow.cp, inj_section_name, tags=curr_tags,
                                         out_dir=output_dir, ifos='HL')
            node = inj_job.create_node(full_segment)
            
            if injection_method == "AT_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
                
            inj_file = node.output_files[0]
            
        elif injection_method == "PREGENERATED":
            injectionFilePath = workflow.cp.get_opt_tags("workflow-injections",
                                      "injections-pregenerated-file", curr_tags)
            file_url = urlparse.urljoin('file:', 
                                        urllib.pathname2url(injectionFilePath))
            inj_file = File('HL', 'PREGEN_inj_file', full_segment, file_url,
                                        tags=curr_tags)
            inj_file.PFN(injectionFilePath, site='local')
            
        else:
            err = "Injection method must be one of IN_WORKFLOW, "
            err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method)
            raise ValueError(err)

        inj_files.append(inj_file)
        inj_tags.append(inj_tag)
        
    logging.info("Leaving injection module.")
    return inj_files, inj_tags
Ejemplo n.º 59
0
def setup_tmpltbank_pregenerated(workflow, tags=None):
    '''
    Setup CBC workflow to use a pregenerated template bank.
    The bank given in cp.get('workflow','pregenerated-template-bank') will be used
    as the input file for all matched-filtering jobs. If this option is
    present, workflow will assume that it should be used and not generate
    template banks within the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    tmplt_banks : pycbc.workflow.core.FileList
        The FileList holding the details of the template bank.
    '''
    if tags is None:
        tags = []
    # Currently this uses the *same* fixed bank for all ifos.
    # Maybe we want to add capability to analyse separate banks in all ifos?
    
    # Set up class for holding the banks
    tmplt_banks = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_TMPLTBANK"
    try:
        # First check if we have a bank for all ifos
        pre_gen_bank = cp.get_opt_tags('workflow-tmpltbank',
                                           'tmpltbank-pregenerated-bank', tags)
        pre_gen_bank = resolve_url(pre_gen_bank)
        file_url = urlparse.urljoin('file:', urllib.pathname2url(pre_gen_bank))
        curr_file = File(workflow.ifos, user_tag, global_seg, file_url,
                                                                     tags=tags)
        curr_file.PFN(file_url, site='local')
        tmplt_banks.append(curr_file)
    except ConfigParser.Error:
        # Okay then I must have banks for each ifo
        for ifo in workflow.ifos:
            try:
                pre_gen_bank = cp.get_opt_tags('workflow-tmpltbank',
                                'tmpltbank-pregenerated-bank-%s' % ifo.lower(),
                                tags)
                pre_gen_bank = resolve_url(pre_gen_bank)
                file_url = urlparse.urljoin('file:',
                                             urllib.pathname2url(pre_gen_bank))
                curr_file = File(ifo, user_tag, global_seg, file_url,
                                                                     tags=tags)
                curr_file.PFN(file_url, site='local')
                tmplt_banks.append(curr_file)

            except ConfigParser.Error:
                err_msg = "Cannot find pregerated template bank in section "
                err_msg += "[workflow-tmpltbank] or any tagged sections. "
                if tags:
                    tagged_secs = " ".join("[workflow-tmpltbank-%s]" \
                                           %(ifo,) for ifo in workflow.ifos)
                    err_msg += "Tagged sections are %s. " %(tagged_secs,)
                err_msg += "I looked for 'tmpltbank-pregenerated-bank' option "
                err_msg += "and 'tmpltbank-pregenerated-bank-%s'." %(ifo,)
                raise ConfigParser.Error(err_msg)
            
    return tmplt_banks
Ejemplo n.º 60
0
def setup_coh_PTF_injections_pp(wf, inj_trigger_files, inj_files,
                                inj_trigger_caches, inj_caches,
                                pp_nodes, pp_outs, inj_tags, out_dir, seg_dir,
                                ifos, tags=None):
    """
    Set up post processing for injections
    """
    injfinder_nodes = []
    injcombiner_parent_nodes = []
    inj_sbv_plotter_parent_nodes = []
    full_segment = inj_trigger_files[0].segment

    injfinder_exe = os.path.basename(wf.cp.get("executables", "injfinder"))
    injfinder_class = select_generic_executable(wf, "injfinder")
    injfinder_jobs = injfinder_class(wf.cp, "injfinder", ifo=ifos,
                                     out_dir=out_dir, tags=tags)

    injcombiner_exe = os.path.basename(wf.cp.get("executables", "injcombiner"))
    injcombiner_class = select_generic_executable(wf, "injcombiner")
    injcombiner_jobs = injcombiner_class(wf.cp, "injcombiner", ifo=ifos,
                                         out_dir=out_dir, tags=tags)

    injfinder_outs = FileList([])
    for inj_tag in inj_tags:
        triggers = FileList([file for file in inj_trigger_files \
                             if inj_tag in file.tag_str])
        injections = FileList([file for file in inj_files \
                               if inj_tag in file.tag_str])
        trig_cache = [file for file in inj_trigger_caches \
                      if inj_tag in file.tag_str][0]
        inj_cache = [file for file in inj_caches \
                     if inj_tag in file.tag_str][0]
        injfinder_node, curr_outs = injfinder_jobs.create_node(\
                triggers, injections, seg_dir, tags=[inj_tag])
        injfinder_nodes.append(injfinder_node)
        pp_nodes.append(injfinder_node)
        wf.add_node(injfinder_node)
        injfinder_outs.extend(curr_outs)
        if "DETECTION" not in curr_outs[0].tagged_description:
            injcombiner_parent_nodes.append(injfinder_node)
        else:
            inj_sbv_plotter_parent_nodes.append(injfinder_node)

    pp_outs.extend(injfinder_outs)

    # Make injfinder output cache
    fm_cache = File(ifos, "foundmissed", full_segment,
                    extension="lcf", directory=out_dir)
    fm_cache.PFN(fm_cache.cache_entry.path, site="local")
    injfinder_outs.convert_to_lal_cache().tofile(\
            open(fm_cache.storage_path, "w"))
    pp_outs.extend(FileList([fm_cache]))

    # Set up injcombiner jobs
    injcombiner_outs = FileList([f for f in injfinder_outs \
                                 if "DETECTION" in f.tag_str])
    injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                        if "DETECTION" not in inj_tag]
    injcombiner_out_tags = [i.tag_str.rsplit('_', 1)[0] for i in \
                            injcombiner_outs if "FOUND" in i.tag_str]
    injcombiner_nodes = []

    for injcombiner_tag in injcombiner_tags:
        max_inc = wf.cp.get_opt_tags("injections", "max-inc",
                                     [injcombiner_tag])
        inj_str = injcombiner_tag.replace("INJ", "")
        inputs = FileList([f for f in injfinder_outs \
                           if injcombiner_tag in f.tagged_description])
        injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                fm_cache, inputs, inj_str, max_inc, wf.analysis_time)
        injcombiner_nodes.append(injcombiner_node)
        injcombiner_out_tags.append("%s_FILTERED_%s"
                                    % (inj_str.split(max_inc)[0], max_inc))
        injcombiner_outs.extend(curr_outs)
        pp_outs.extend(curr_outs)
        pp_nodes.append(injcombiner_node)
        wf.add_node(injcombiner_node)
        for parent_node in injcombiner_parent_nodes:
            dep = dax.Dependency(parent=parent_node._dax_node,
                                 child=injcombiner_node._dax_node)
            wf._adag.addDependency(dep)

    return (wf, injfinder_nodes, injfinder_outs, fm_cache, injcombiner_nodes,
            injcombiner_outs, injcombiner_out_tags,
            inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs)