def compute_inj_optimal_snr(workflow, inj_file, precalc_psd_files, out_dir, tags=None): "Set up a job for computing optimal SNRs of a sim_inspiral file." if tags is None: tags = [] try: factor = int( workflow.cp.get_opt_tags('workflow-optimal-snr', 'parallelization-factor', tags)) except ConfigParser.Error: factor = 1 if factor == 1: # parallelization factor not given - default to single optimal snr job opt_snr_exe = PyCBCOptimalSNRExecutable(workflow.cp, 'optimal_snr', ifos=workflow.ifos, out_dir=out_dir, tags=tags) node = opt_snr_exe.create_node(workflow, inj_file, precalc_psd_files, '0/1') workflow += node return node.output_files[0] opt_snr_split_files = [] for i in range(factor): group_str = '%s/%s' % (i, factor) opt_snr_exe = PyCBCOptimalSNRExecutable(workflow.cp, 'optimal_snr', ifos=workflow.ifos, out_dir=out_dir, tags=tags + [str(i)]) opt_snr_exe.update_current_retention_level( Executable.INTERMEDIATE_PRODUCT) node = opt_snr_exe.create_node(workflow, inj_file, precalc_psd_files, group_str) opt_snr_split_files += [node.output_files[0]] workflow += node llwadd_exe = LigolwAddExecutable(workflow.cp, 'optimal_snr_merge', ifos=workflow.ifos, out_dir=out_dir, tags=tags) llwadd_exe.update_current_retention_level(Executable.MERGED_TRIGGERS) merge_node = llwadd_exe.create_node(workflow.analysis_time, opt_snr_split_files, use_tmp_subdirs=False) workflow += merge_node return merge_node.output_files[0]
def add_cumulative_files(workflow, output_file, input_files, out_dir, execute_now=False, tags=[]): """ Function to combine a set of segment files into a single one. This function will not merge the segment lists but keep each separate. Parameters ----------- workflow: pycbc.workflow.core.Workflow An instance of the Workflow class that manages the workflow. output_file: pycbc.workflow.core.File The output file object input_files: pycbc.workflow.core.FileList This list of input segment files out_dir : path The directory to write output to. execute_now : boolean, optional If true, jobs are executed immediately. If false, they are added to the workflow to be run later. tags : list of strings, optional A list of strings that is used to identify this job """ llwadd_job = LigolwAddExecutable(workflow.cp, 'llwadd', ifo=output_file.ifo_list, out_dir=out_dir, tags=tags) add_node = llwadd_job.create_node(output_file.segment, input_files, output=output_file) if execute_now: if file_needs_generating(add_node.output_files[0].cache_entry.path): workflow.execute_node(add_node) else: add_node.executed = True for fil in add_node._outputs: fil.node = None fil.PFN(fil.storage_path, site='local') else: workflow.add_node(add_node) return add_node.output_files[0]
def get_cumulative_segs(workflow, currSegFile, categories, segFilesList, out_dir, tags=[], execute_now=False, segment_name=None): """ Function to generate one of the cumulative, multi-detector segment files as part of the workflow. Parameters ----------- workflow: pycbc.workflow.core.Workflow An instance of the Workflow class that manages the workflow. currSegFile : pycbc.workflow.core.SegFile The SegFile corresponding to this file that will be created. categories : int The veto categories to include in this cumulative veto. segFilesList : Listionary of SegFiles The list of segment files to be used as input for combining. out_dir : path The directory to write output to. tags : list of strings, optional A list of strings that is used to identify this job execute_now : boolean, optional If true, jobs are executed immediately. If false, they are added to the workflow to be run later. """ add_inputs = FileList([]) valid_segment = currSegFile.segment if segment_name is None: segment_name = 'VETO_CAT%d_CUMULATIVE' % (categories[-1]) cp = workflow.cp # calculate the cumulative veto files for a given ifo for ifo in workflow.ifos: cum_job = LigoLWCombineSegsExecutable(cp, 'ligolw_combine_segments', out_dir=out_dir, tags=tags + [segment_name], ifos=ifo) inputs = [] files = segFilesList.find_output_with_ifo(ifo) for category in categories: fileList = files.find_output_with_tag('VETO_CAT%d' %(category)) inputs+=fileList cum_node = cum_job.create_node(valid_segment, inputs, segment_name) if execute_now: if file_needs_generating(cum_node.output_files[0].cache_entry.path): workflow.execute_node(cum_node) else: cum_node.executed = True for fil in cum_node._outputs: fil.node = None fil.PFN(fil.storage_path, site='local') else: workflow.add_node(cum_node) add_inputs += cum_node.output_files # add cumulative files for each ifo together add_job = LigolwAddExecutable(cp, 'llwadd', ifo=ifo, out_dir=out_dir, tags=tags) add_node = add_job.create_node(valid_segment, add_inputs, output=currSegFile) if execute_now: if file_needs_generating(add_node.output_files[0].cache_entry.path): workflow.execute_node(add_node) else: add_node.executed = True for fil in add_node._outputs: fil.node = None fil.PFN(fil.storage_path, site='local') else: workflow.add_node(add_node) return add_node.output_files[0]
def setup_coincidence_workflow_ligolw_thinca( workflow, segsList, timeSlideFiles, inspiral_outs, output_dir, veto_cats=[2,3,4], tags=[], timeSlideTags=None, parallelize_split_input=False): """ This function is used to setup a single-stage ihope style coincidence stage of the workflow using ligolw_sstinca (or compatible code!). Parameters ----------- workflow : pycbc.workflow.core.Workflow The workflow instance that the coincidence jobs will be added to. segsList : pycbc.workflow.core.FileList The list of files returned by workflow's segment module that contains pointers to all the segment files generated in the workflow. If the coincidence code will be applying the data quality vetoes, then this will be used to ensure that the codes get the necessary input to do this. timeSlideFiles : pycbc.workflow.core.FileList An FileList of the timeSlide input files that are needed to determine what time sliding needs to be done. One of the timeSlideFiles will normally be "zero-lag only", the others containing time slides used to facilitate background computations later in the workflow. inspiral_outs : pycbc.workflow.core.FileList An FileList of the matched-filter module output that is used as input to the coincidence codes running at this stage. output_dir : path The directory in which coincidence output will be stored. veto_cats : list of ints (optional, default = [2,3,4]) Veto categories that will be applied in the coincidence jobs. If this takes the default value the code will run data quality at cumulative categories 2, 3 and 4. Note that if we change the flag definitions to be non-cumulative then this option will need to be revisited. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['BNSINJECTIONS'] or ['NOINJECTIONANALYSIS']. This will be used in output names. timeSlideTags : list of strings (optional, default = []) A list of the tags corresponding to the timeSlideFiles that are to be used in this call to the module. This can be used to ensure that the injection runs do no time sliding, but the no-injection runs do perform time slides (or vice-versa if you prefer!) Returns -------- ligolwThincaOuts : pycbc.workflow.core.FileList A list of the output files generated from ligolw_sstinca. ligolwAddOuts : pycbc.workflow.core.FileList A list of the output files generated from ligolw_add. """ from pylal import ligolw_cafe logging.debug("Entering coincidence module.") cp = workflow.cp ifoString = workflow.ifo_string # setup code for each veto_category coinc_outs = FileList([]) other_outs = {} if not timeSlideTags: # Get all sections by looking in ini file, use all time slide files. timeSlideTags = [(sec.split('-')[-1]).upper() for sec in workflow.cp.sections() if sec.startswith('tisi-')] if parallelize_split_input: # Want to split all input jobs according to their JOB%d tag. # This matches any string that is the letters JOB followed by some # numbers and nothing else. inspiral_outs_dict = {} regex_match = re.compile('JOB([0-9]+)\Z') for file in inspiral_outs: matches = [regex_match.match(tag) for tag in file.tags] # Remove non matching entries matches = [i for i in matches if i is not None] # Must have one entry if len(matches) == 0: warn_msg = "I was asked to parallelize over split inspiral " warn_msg += "files at the coincidence stage, but at least one " warn_msg += "input file does not have a JOB\%d tag indicating " warn_msg += "that it was split. Assuming that I do not have " warn_msg += "split input files and turning " warn_msg += "parallelize_split_input off." logging.warn(warn_msg) parallelize_split_input = False break if len(matches) > 1: err_msg = "One of my input files has two tags fitting JOB\%d " err_msg += "this means I cannot tell which split job this " err_msg += "file is from." raise ValueError(err_msg) # Extract the job ID id = int(matches[0].string[3:]) if not inspiral_outs_dict.has_key(id): inspiral_outs_dict[id] = FileList([]) inspiral_outs_dict[id].append(file) else: # If I got through all the files I want to sort the dictionaries so # that file with key a and index 3 is the same file as key b and # index 3 other than the tag is JOBA -> JOBB ... ie. it has used # a different part of the template bank. sort_lambda = lambda x: (x.ifo_string, x.segment, x.tagged_description) for key in inspiral_outs_dict.keys(): inspiral_outs_dict[id].sort(key = sort_lambda) # These should be in ascending order, so I can assume the existence # of a JOB0 tag inspiral_outs = inspiral_outs_dict[0] for index, file in enumerate(inspiral_outs): # Store the index in the file for quicker mapping later file.thinca_index = index else: inspiral_outs_dict = None for timeSlideTag in timeSlideTags: # Get the time slide file from the inputs tisiOutFile = timeSlideFiles.find_output_with_tag(timeSlideTag) if not len(tisiOutFile) == 1: errMsg = "If you are seeing this, something batshit is going on!" if len(tisiOutFile) == 0: errMsg = "No time slide files found matching %s." \ %(timeSlideTag) if len(tisiOutFile) > 1: errMsg = "More than one time slide files match %s." \ %(timeSlideTag) raise ValueError(errMsg) tisiOutFile = tisiOutFile[0] # Next we run ligolw_cafe. This is responsible for # identifying what times will be used for the ligolw_thinca jobs and # what files are needed for each. If doing time sliding there # will be some triggers read into multiple jobs cacheInspOuts = inspiral_outs.convert_to_lal_cache() if workflow.cp.has_option_tags("workflow-coincidence", "maximum-extent", tags): max_extent = float( workflow.cp.get_opt_tags( "workflow-coincidence", "maximum-extent", tags) ) else: # hard-coded default value for extent of time in a single job max_extent = 3600 logging.debug("Calling into cafe.") time_slide_table = lsctables.TimeSlideTable.get_table(\ ligolw_utils.load_filename(tisiOutFile.storage_path, gz=tisiOutFile.storage_path.endswith(".gz"), contenthandler=ContentHandler, verbose=False)) time_slide_table.sync_next_id() time_slide_dict = time_slide_table.as_dict() cafe_seglists, cafe_caches = ligolw_cafe.ligolw_cafe(cacheInspOuts, time_slide_dict.values(), extentlimit=max_extent, verbose=False) logging.debug("Done with cafe.") # Take the combined seglist file dqSegFile=segsList.find_output_with_tag('COMBINED_CUMULATIVE_SEGMENTS') if not len(dqSegFile) == 1: errMsg = "Did not find exactly 1 data quality file." print len(dqSegFile), dqSegFile raise ValueError(errMsg) dqSegFile=dqSegFile[0] # Set up llwadd job llwadd_tags = [timeSlideTag] + tags ligolwadd_job = LigolwAddExecutable(cp, 'llwadd', ifo=ifoString, out_dir=output_dir, tags=llwadd_tags) ligolwAddOuts = FileList([]) # Go global setup at each category # This flag will add a clustering job after ligolw_thinca if workflow.cp.has_option_tags("workflow-coincidence", "coincidence-post-cluster", llwadd_tags): coinc_post_cluster = True else: coinc_post_cluster = False # Go global setup at each category ligolwthinca_job = {} cluster_job = {} thinca_tags = {} for category in veto_cats: logging.debug("Preparing %s %s" %(timeSlideTag,category)) dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(category) # FIXME: Should we resolve this now? # FIXME: Here we set the dqVetoName to be compatible with pipedown # For pipedown must put the slide identifier first and # dqVetoName last. pipedownDQVetoName = 'CAT_%d_VETO' %(category) curr_thinca_job_tags = [timeSlideTag] + tags + [pipedownDQVetoName] thinca_tags[category]=curr_thinca_job_tags # Set up jobs for ligolw_thinca ligolwthinca_job[category] = LigolwSSthincaExecutable(cp, 'thinca', ifo=ifoString, out_dir=output_dir, dqVetoName=dqVetoName, tags=curr_thinca_job_tags) if coinc_post_cluster: cluster_job[category] = SQLInOutExecutable(cp, 'pycbccluster', ifo=ifoString, out_dir=output_dir, tags=curr_thinca_job_tags) for idx, cafe_cache in enumerate(cafe_caches): ligolwAddOuts = FileList([]) ligolwThincaOuts = FileList([]) ligolwThincaLikelihoodOuts = FileList([]) ligolwClusterOuts = FileList([]) if not len(cafe_cache.objects): raise ValueError("One of the cache objects contains no files!") # Determine segments to accept coincidences. # If cache is not the first or last in the timeseries, check if the # two closes caches in the timeseries and see if their extent # match. If they match, they're adjacent and use the time where # they meet as a bound for accepting coincidences. If they're not # adjacent, then there is no bound for accepting coincidences. coincStart, coincEnd = None, None if idx and (cafe_cache.extent[0] == cafe_caches[idx-1].extent[1]): coincStart = cafe_cache.extent[0] if idx + 1 - len(cafe_caches) and \ (cafe_cache.extent[1] == cafe_caches[idx+1].extent[0]): coincEnd = cafe_cache.extent[1] coincSegment = (coincStart, coincEnd) # Need to create a list of the File(s) contained in the cache. # Assume that if we have partitioned input then if *one* job in the # partitioned input is an input then *all* jobs will be. if not parallelize_split_input: inputTrigFiles = FileList([]) for object in cafe_cache.objects: inputTrigFiles.append(object.workflow_file) llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile] # Now we can create the nodes node = ligolwadd_job.create_node(cafe_cache.extent, llw_files) ligolwAddFile = node.output_files[0] ligolwAddOuts.append(ligolwAddFile) workflow.add_node(node) for category in veto_cats: node = ligolwthinca_job[category].create_node(\ cafe_cache.extent, coincSegment, ligolwAddFile) ligolwThincaOuts += \ node.output_files.find_output_without_tag('DIST_STATS') ligolwThincaLikelihoodOuts += \ node.output_files.find_output_with_tag('DIST_STATS') workflow.add_node(node) if coinc_post_cluster: node = cluster_job[category].create_node(\ cafe_cache.extent, ligolwThincaOuts[-1]) ligolwClusterOuts += node.output_files workflow.add_node(node) else: for key in inspiral_outs_dict.keys(): curr_tags = ["JOB%d" %(key)] curr_list = inspiral_outs_dict[key] inputTrigFiles = FileList([]) for object in cafe_cache.objects: inputTrigFiles.append( curr_list[object.workflow_file.thinca_index]) llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile] # Now we can create the nodes node = ligolwadd_job.create_node(cafe_cache.extent, llw_files, tags=curr_tags) ligolwAddFile = node.output_files[0] ligolwAddOuts.append(ligolwAddFile) workflow.add_node(node) if workflow.cp.has_option_tags("workflow-coincidence", "coincidence-write-likelihood",curr_thinca_job_tags): write_likelihood=True else: write_likelihood=False for category in veto_cats: node = ligolwthinca_job[category].create_node(\ cafe_cache.extent, coincSegment, ligolwAddFile, tags=curr_tags, write_likelihood=write_likelihood) ligolwThincaOuts += \ node.output_files.find_output_without_tag(\ 'DIST_STATS') ligolwThincaLikelihoodOuts += \ node.output_files.find_output_with_tag(\ 'DIST_STATS') workflow.add_node(node) if coinc_post_cluster: node = cluster_job[category].create_node(\ cafe_cache.extent, ligolwThincaOuts[-1]) ligolwClusterOuts += node.output_files workflow.add_node(node) other_returns = {} other_returns['LIGOLW_ADD'] = ligolwAddOuts other_returns['DIST_STATS'] = ligolwThincaLikelihoodOuts if coinc_post_cluster: main_return = ligolwClusterOuts other_returns['THINCA'] = ligolwThincaOuts else: main_return = ligolwThincaOuts logging.debug("Done") coinc_outs.extend(main_return) for key, file_list in other_returns.items(): if other_outs.has_key(key): other_outs[key].extend(other_returns[key]) else: other_outs[key] = other_returns[key] return coinc_outs, other_outs
def setup_coincidence_workflow_ligolw_thinca(workflow, segsList, timeSlideFiles, inspiral_outs, output_dir, veto_cats=[2, 3, 4], tags=[], timeSlideTags=None, parallelize_split_input=False): """ This function is used to setup a single-stage ihope style coincidence stage of the workflow using ligolw_sstinca (or compatible code!). Parameters ----------- workflow : pycbc.workflow.core.Workflow The workflow instance that the coincidence jobs will be added to. segsList : pycbc.workflow.core.FileList The list of files returned by workflow's segment module that contains pointers to all the segment files generated in the workflow. If the coincidence code will be applying the data quality vetoes, then this will be used to ensure that the codes get the necessary input to do this. timeSlideFiles : pycbc.workflow.core.FileList An FileList of the timeSlide input files that are needed to determine what time sliding needs to be done. One of the timeSlideFiles will normally be "zero-lag only", the others containing time slides used to facilitate background computations later in the workflow. inspiral_outs : pycbc.workflow.core.FileList An FileList of the matched-filter module output that is used as input to the coincidence codes running at this stage. output_dir : path The directory in which coincidence output will be stored. veto_cats : list of ints (optional, default = [2,3,4]) Veto categories that will be applied in the coincidence jobs. If this takes the default value the code will run data quality at cumulative categories 2, 3 and 4. Note that if we change the flag definitions to be non-cumulative then this option will need to be revisited. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['BNSINJECTIONS'] or ['NOINJECTIONANALYSIS']. This will be used in output names. timeSlideTags : list of strings (optional, default = []) A list of the tags corresponding to the timeSlideFiles that are to be used in this call to the module. This can be used to ensure that the injection runs do no time sliding, but the no-injection runs do perform time slides (or vice-versa if you prefer!) Returns -------- ligolwThincaOuts : pycbc.workflow.core.FileList A list of the output files generated from ligolw_sstinca. ligolwAddOuts : pycbc.workflow.core.FileList A list of the output files generated from ligolw_add. """ from pylal import ligolw_cafe logging.debug("Entering coincidence module.") cp = workflow.cp ifoString = workflow.ifo_string # setup code for each veto_category coinc_outs = FileList([]) other_outs = {} if not timeSlideTags: # Get all sections by looking in ini file, use all time slide files. timeSlideTags = [(sec.split('-')[-1]).upper() for sec in workflow.cp.sections() if sec.startswith('tisi-')] if parallelize_split_input: # Want to split all input jobs according to their JOB%d tag. # This matches any string that is the letters JOB followed by some # numbers and nothing else. inspiral_outs_dict = {} regex_match = re.compile('JOB([0-9]+)\Z') for file in inspiral_outs: matches = [regex_match.match(tag) for tag in file.tags] # Remove non matching entries matches = [i for i in matches if i is not None] # Must have one entry if len(matches) == 0: warn_msg = "I was asked to parallelize over split inspiral " warn_msg += "files at the coincidence stage, but at least one " warn_msg += "input file does not have a JOB\%d tag indicating " warn_msg += "that it was split. Assuming that I do not have " warn_msg += "split input files and turning " warn_msg += "parallelize_split_input off." logging.warn(warn_msg) parallelize_split_input = False break if len(matches) > 1: err_msg = "One of my input files has two tags fitting JOB\%d " err_msg += "this means I cannot tell which split job this " err_msg += "file is from." raise ValueError(err_msg) # Extract the job ID id = int(matches[0].string[3:]) if not inspiral_outs_dict.has_key(id): inspiral_outs_dict[id] = FileList([]) inspiral_outs_dict[id].append(file) else: # If I got through all the files I want to sort the dictionaries so # that file with key a and index 3 is the same file as key b and # index 3 other than the tag is JOBA -> JOBB ... ie. it has used # a different part of the template bank. sort_lambda = lambda x: (x.ifo_string, x.segment, x. tagged_description) for key in inspiral_outs_dict.keys(): inspiral_outs_dict[id].sort(key=sort_lambda) # These should be in ascending order, so I can assume the existence # of a JOB0 tag inspiral_outs = inspiral_outs_dict[0] for index, file in enumerate(inspiral_outs): # Store the index in the file for quicker mapping later file.thinca_index = index else: inspiral_outs_dict = None for timeSlideTag in timeSlideTags: # Get the time slide file from the inputs tisiOutFile = timeSlideFiles.find_output_with_tag(timeSlideTag) if not len(tisiOutFile) == 1: errMsg = "If you are seeing this, something batshit is going on!" if len(tisiOutFile) == 0: errMsg = "No time slide files found matching %s." \ %(timeSlideTag) if len(tisiOutFile) > 1: errMsg = "More than one time slide files match %s." \ %(timeSlideTag) raise ValueError(errMsg) tisiOutFile = tisiOutFile[0] # Next we run ligolw_cafe. This is responsible for # identifying what times will be used for the ligolw_thinca jobs and # what files are needed for each. If doing time sliding there # will be some triggers read into multiple jobs cacheInspOuts = inspiral_outs.convert_to_lal_cache() if workflow.cp.has_option_tags("workflow-coincidence", "maximum-extent", tags): max_extent = float( workflow.cp.get_opt_tags("workflow-coincidence", "maximum-extent", tags)) else: # hard-coded default value for extent of time in a single job max_extent = 3600 logging.debug("Calling into cafe.") time_slide_table = lsctables.TimeSlideTable.get_table(\ ligolw_utils.load_filename(tisiOutFile.storage_path, gz=tisiOutFile.storage_path.endswith(".gz"), contenthandler=ContentHandler, verbose=False)) time_slide_table.sync_next_id() time_slide_dict = time_slide_table.as_dict() cafe_seglists, cafe_caches = ligolw_cafe.ligolw_cafe( cacheInspOuts, time_slide_dict.values(), extentlimit=max_extent, verbose=False) logging.debug("Done with cafe.") # Take the combined seglist file dqSegFile = segsList.find_output_with_tag( 'COMBINED_CUMULATIVE_SEGMENTS') if not len(dqSegFile) == 1: errMsg = "Did not find exactly 1 data quality file." print len(dqSegFile), dqSegFile raise ValueError(errMsg) dqSegFile = dqSegFile[0] # Set up llwadd job llwadd_tags = [timeSlideTag] + tags ligolwadd_job = LigolwAddExecutable(cp, 'llwadd', ifo=ifoString, out_dir=output_dir, tags=llwadd_tags) ligolwAddOuts = FileList([]) # Go global setup at each category # This flag will add a clustering job after ligolw_thinca if workflow.cp.has_option_tags("workflow-coincidence", "coincidence-post-cluster", llwadd_tags): coinc_post_cluster = True else: coinc_post_cluster = False # Go global setup at each category ligolwthinca_job = {} cluster_job = {} thinca_tags = {} for category in veto_cats: logging.debug("Preparing %s %s" % (timeSlideTag, category)) dqVetoName = 'VETO_CAT%d_CUMULATIVE' % (category) # FIXME: Should we resolve this now? # FIXME: Here we set the dqVetoName to be compatible with pipedown # For pipedown must put the slide identifier first and # dqVetoName last. pipedownDQVetoName = 'CAT_%d_VETO' % (category) curr_thinca_job_tags = [timeSlideTag] + tags + [pipedownDQVetoName] thinca_tags[category] = curr_thinca_job_tags # Set up jobs for ligolw_thinca ligolwthinca_job[category] = LigolwSSthincaExecutable( cp, 'thinca', ifo=ifoString, out_dir=output_dir, dqVetoName=dqVetoName, tags=curr_thinca_job_tags) if coinc_post_cluster: cluster_job[category] = SQLInOutExecutable( cp, 'pycbccluster', ifo=ifoString, out_dir=output_dir, tags=curr_thinca_job_tags) for idx, cafe_cache in enumerate(cafe_caches): ligolwAddOuts = FileList([]) ligolwThincaOuts = FileList([]) ligolwThincaLikelihoodOuts = FileList([]) ligolwClusterOuts = FileList([]) if not len(cafe_cache.objects): raise ValueError("One of the cache objects contains no files!") # Determine segments to accept coincidences. # If cache is not the first or last in the timeseries, check if the # two closes caches in the timeseries and see if their extent # match. If they match, they're adjacent and use the time where # they meet as a bound for accepting coincidences. If they're not # adjacent, then there is no bound for accepting coincidences. coincStart, coincEnd = None, None if idx and (cafe_cache.extent[0] == cafe_caches[idx - 1].extent[1]): coincStart = cafe_cache.extent[0] if idx + 1 - len(cafe_caches) and \ (cafe_cache.extent[1] == cafe_caches[idx+1].extent[0]): coincEnd = cafe_cache.extent[1] coincSegment = (coincStart, coincEnd) # Need to create a list of the File(s) contained in the cache. # Assume that if we have partitioned input then if *one* job in the # partitioned input is an input then *all* jobs will be. if not parallelize_split_input: inputTrigFiles = FileList([]) for object in cafe_cache.objects: inputTrigFiles.append(object.workflow_file) llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile] # Now we can create the nodes node = ligolwadd_job.create_node(cafe_cache.extent, llw_files) ligolwAddFile = node.output_files[0] ligolwAddOuts.append(ligolwAddFile) workflow.add_node(node) for category in veto_cats: node = ligolwthinca_job[category].create_node(\ cafe_cache.extent, coincSegment, ligolwAddFile) ligolwThincaOuts += \ node.output_files.find_output_without_tag('DIST_STATS') ligolwThincaLikelihoodOuts += \ node.output_files.find_output_with_tag('DIST_STATS') workflow.add_node(node) if coinc_post_cluster: node = cluster_job[category].create_node(\ cafe_cache.extent, ligolwThincaOuts[-1]) ligolwClusterOuts += node.output_files workflow.add_node(node) else: for key in inspiral_outs_dict.keys(): curr_tags = ["JOB%d" % (key)] curr_list = inspiral_outs_dict[key] inputTrigFiles = FileList([]) for object in cafe_cache.objects: inputTrigFiles.append( curr_list[object.workflow_file.thinca_index]) llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile] # Now we can create the nodes node = ligolwadd_job.create_node(cafe_cache.extent, llw_files, tags=curr_tags) ligolwAddFile = node.output_files[0] ligolwAddOuts.append(ligolwAddFile) workflow.add_node(node) if workflow.cp.has_option_tags( "workflow-coincidence", "coincidence-write-likelihood", curr_thinca_job_tags): write_likelihood = True else: write_likelihood = False for category in veto_cats: node = ligolwthinca_job[category].create_node(\ cafe_cache.extent, coincSegment, ligolwAddFile, tags=curr_tags, write_likelihood=write_likelihood) ligolwThincaOuts += \ node.output_files.find_output_without_tag(\ 'DIST_STATS') ligolwThincaLikelihoodOuts += \ node.output_files.find_output_with_tag(\ 'DIST_STATS') workflow.add_node(node) if coinc_post_cluster: node = cluster_job[category].create_node(\ cafe_cache.extent, ligolwThincaOuts[-1]) ligolwClusterOuts += node.output_files workflow.add_node(node) other_returns = {} other_returns['LIGOLW_ADD'] = ligolwAddOuts other_returns['DIST_STATS'] = ligolwThincaLikelihoodOuts if coinc_post_cluster: main_return = ligolwClusterOuts other_returns['THINCA'] = ligolwThincaOuts else: main_return = ligolwThincaOuts logging.debug("Done") coinc_outs.extend(main_return) for key, file_list in other_returns.items(): if other_outs.has_key(key): other_outs[key].extend(other_returns[key]) else: other_outs[key] = other_returns[key] return coinc_outs, other_outs