def make_random_document(num_procs, num_seg_defs, num_segs, num_seg_sums, start_time, end_time, max_len): """Create a ligolw document with random segments and segment_summary""" doc = ligolw.Document() doc.appendChild(ligolw.LIGO_LW()) # Add some processes proc_ids = [] seg_def_ids = {} segment_map = {} segment_sum_map = {} for count in range(num_procs): proc_id = append_process(doc, "Test program %d" % count).process_id proc_ids.append(proc_id) seg_def_ids[proc_id] = [] # Add some segment definers for count in range(num_seg_defs): proc_id = proc_ids[int(uniform(0,num_procs))] seg_def_id = add_to_segment_definer(doc, proc_id, "H1", "TEST_SEG_%d" % count, 1) seg_def_ids[proc_id].append(seg_def_id) # Add some segments sgmntlst = random_segments(start_time, end_time, num_segs, max_len) add_to_segment(doc, proc_id, seg_def_id, sgmntlst) sgmntlst.coalesce() segment_map["TEST_SEG_%d" % count] = sgmntlst # Add some segment summaries sgmntlst = random_segments(start_time, end_time, num_segs, max_len) add_to_segment_summary(doc, proc_id, seg_def_id, sgmntlst) sgmntlst.coalesce() segment_sum_map["TEST_SEG_%d" % count] = sgmntlst return doc, segment_map, segment_sum_map
def run_segment_operation(outdoc, filenames, segments, use_segment_table, operation, result_name = 'RESULT', preserve = True): """ Performs an operation (intersect or union) across a set of segments. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc and a list of segments DMT-FLAG1,DMT-FLAG1 this returns RESULT = (table 1's DMT-FLAG1 union table 2's DMT-FLAG1 union ...) operation (table 1's DMT-FLAG2 union table 2's DMT-FLAG2 union ...) operation etc """ proc_id = table.get_table(outdoc, lsctables.ProcessTable.tableName)[0].process_id if preserve: indoc = ligolw_add.ligolw_add(outdoc, filenames) else: indoc = ligolw_add.ligolw_add(ligolw.Document(), filenames) # Start with a segment covering all of time, then # intersect with each of the fields of interest keys = segments.split(',') if operation == INTERSECT: sgmntlist = glue.segments.segmentlist([glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity())]) for key in keys: sgmntlist &= find_segments(indoc, key, use_segment_table) elif operation == UNION: sgmntlist = glue.segments.segmentlist([]) for key in keys: sgmntlist |= find_segments(indoc, key, use_segment_table) elif operation == DIFF: sgmntlist = find_segments(indoc, keys[0], use_segment_table) for key in keys[1:]: sgmntlist -= find_segments(indoc, key, use_segment_table) else: raise NameError("%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer and segments seg_def_id = add_to_segment_definer(outdoc, proc_id, '', result_name, 1) if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, sgmntlist) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, sgmntlist) return outdoc, abs(sgmntlist)
def run_segment_operation(outdoc, filenames, segments, use_segment_table, operation, result_name = 'RESULT', preserve = True): """ Performs an operation (intersect or union) across a set of segments. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc and a list of segments DMT-FLAG1,DMT-FLAG1 this returns RESULT = (table 1's DMT-FLAG1 union table 2's DMT-FLAG1 union ...) operation (table 1's DMT-FLAG2 union table 2's DMT-FLAG2 union ...) operation etc """ proc_id = lsctables.ProcessTable.get_table(outdoc)[0].process_id if preserve: indoc = ligolw_add.ligolw_add(outdoc, filenames) else: indoc = ligolw_add.ligolw_add(ligolw.Document(), filenames) # Start with a segment covering all of time, then # intersect with each of the fields of interest keys = segments.split(',') if operation == INTERSECT: sgmntlist = glue.segments.segmentlist([glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity())]) for key in keys: sgmntlist &= find_segments(indoc, key, use_segment_table) elif operation == UNION: sgmntlist = glue.segments.segmentlist([]) for key in keys: sgmntlist |= find_segments(indoc, key, use_segment_table) elif operation == DIFF: sgmntlist = find_segments(indoc, keys[0], use_segment_table) for key in keys[1:]: sgmntlist -= find_segments(indoc, key, use_segment_table) else: raise NameError("%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer and segments seg_def_id = add_to_segment_definer(outdoc, proc_id, '', result_name, 1) if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, sgmntlist) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, sgmntlist) return outdoc, abs(sgmntlist)
def make_random_document(num_procs, num_seg_defs, num_segs, num_seg_sums, start_time, end_time, max_len): """Create a ligolw document with random segments and segment_summary""" doc = ligolw.Document() doc.appendChild(ligolw.LIGO_LW()) # Add some processes proc_ids = [] seg_def_ids = {} segment_map = {} segment_sum_map = {} for count in range(num_procs): proc_id = append_process(doc, "Test program %d" % count).process_id proc_ids.append(proc_id) seg_def_ids[proc_id] = [] # Add some segment definers for count in range(num_seg_defs): proc_id = proc_ids[int(uniform(0, num_procs))] seg_def_id = add_to_segment_definer(doc, proc_id, "H1", "TEST_SEG_%d" % count, 1) seg_def_ids[proc_id].append(seg_def_id) # Add some segments sgmntlst = random_segments(start_time, end_time, num_segs, max_len) add_to_segment(doc, proc_id, seg_def_id, sgmntlst) sgmntlst.coalesce() segment_map["TEST_SEG_%d" % count] = sgmntlst # Add some segment summaries sgmntlst = random_segments(start_time, end_time, num_segs, max_len) add_to_segment_summary(doc, proc_id, seg_def_id, sgmntlst) sgmntlst.coalesce() segment_sum_map["TEST_SEG_%d" % count] = sgmntlst return doc, segment_map, segment_sum_map
# ANALYSABLE - This one needs to come from inspiral outs analysableSegs = insps.get_times_covered_by_files() # And add these to the output file # Start with the segment summary summSegs = segments.segmentlist([workflow.analysis_time]) sci_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE", 0) sciok_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_OK", 0) sciavailable_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_AVAILABLE", 0) analysable_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sciSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciok_def_id, sciokSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciavailable_def_id, sciavailableSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysableSegs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id, summSegs, comment='') segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sciok_def_id, summSegs,
segmentdb_utils.add_to_segment_summary(doc, proc_out.process_id, seg_def_id, sum_result_segments_list) # Now add segments to doc curs.execute("SELECT segment.start_time, segment.end_time FROM segment_definer, segment WHERE segment.segment_def_id = segment_definer.segment_def_id AND (segment_definer.ifos = '%s' AND segment_definer.name = '%s' AND segment_definer.version = %d)" %(ifo,name,version)) segresult=curs.fetchall() #from glue import segments seg_result_segments=[segments.segment(i) for i in segresult] seg_result_segments_list=segments.segmentlist(seg_result_segments) seg_result_segments_list.coalesce() segmentdb_utils.add_to_segment(doc, proc_out.process_id, seg_def_id, seg_result_segments_list) # Now publish the doc after writing it to disk temporarily #import logging #import logging.handlers fake_file = StringIO.StringIO() doc.write(fake_file) import time filepath='/tmp/ligolw_segment_insert_'+str(time.time())+'.xml' #atexit.register(del_file,filepath) # fp = open(filepath,'w') fp.write(fake_file.getvalue()) fp.close()
# ANALYSABLE - This one needs to come from inspiral outs analysableSegs = insps.get_times_covered_by_files() # And add these to the output file # Start with the segment summary summSegs = segments.segmentlist([workflow.analysis_time]) sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE", 0) sciok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_OK", 0) sciavailable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_AVAILABLE", 0) analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sciSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciok_def_id, sciokSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciavailable_def_id, sciavailableSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysableSegs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id, summSegs, comment='') segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sciok_def_id, summSegs, comment='') segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sciavailable_def_id, summSegs, comment='') segmentdb_utils.add_to_segment_summary(outdoc, proc_id, analysable_def_id, summSegs, comment='')
def run_file_operation(outdoc, filenames, use_segment_table, operation, preserve=True): """ Performs an operation (intersect or union) across a set of files. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc the result is a file where DMT-FLAG1 = (file 1's DMT-FLAG1 operation file 2's DMT-FLAG1 operation ...) DMT-FLAG2 = (file 1's DMT-FLAG2 operation file 2's DMT-FLAG2 operation ...) etc """ proc_id = table.get_table(outdoc, lsctables.ProcessTable.tableName)[0].process_id # load up the files into individual documents xmldocs = [ ligolw_add.ligolw_add(ligolw.Document(), [fname]) for fname in filenames ] # Get the list of dinstinct segment_definers across all docs segment_definers = {} def register_definer(seg_def): key = (seg_def.ifos, seg_def.name, seg_def.version) segment_definers[key] = True return key for xmldoc in xmldocs: seg_def_table = table.get_table(xmldoc, lsctables.SegmentDefTable.tableName) map(register_definer, seg_def_table) # For each unique segment definer, find the intersection for ifo, name, version in segment_definers: if operation == INTERSECT: # If I were feeling especially functional-ist I'd write this # with reduce() result = glue.segments.segmentlist([ glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity()) ]) for xmldoc in xmldocs: result &= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == UNION: result = glue.segments.segmentlist([]) for xmldoc in xmldocs: result |= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == DIFF: result = find_segments(xmldocs[0], '%s:%s:%d' % (ifo, name, version), use_segment_table) for xmldoc in xmldocs[1:]: result -= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) else: raise NameError( "%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer for the result seg_def_id = add_to_segment_definer(outdoc, proc_id, ifo, name, version) # Add the segments if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, result) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, result) # If we're preserving, also load up everything into the output document. if preserve: # Add them to the output document map(lambda x: outdoc.appendChild(x.childNodes[0]), xmldocs) # Merge the ligolw elements and tables ligolw_add.merge_ligolws(outdoc) ligolw_add.merge_compatible_tables(outdoc) return outdoc, abs(result)
def run_file_operation(outdoc, filenames, use_segment_table, operation, preserve = True): """ Performs an operation (intersect or union) across a set of files. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc the result is a file where DMT-FLAG1 = (file 1's DMT-FLAG1 operation file 2's DMT-FLAG1 operation ...) DMT-FLAG2 = (file 1's DMT-FLAG2 operation file 2's DMT-FLAG2 operation ...) etc """ proc_id = table.get_table(outdoc, lsctables.ProcessTable.tableName)[0].process_id # load up the files into individual documents xmldocs = [ligolw_add.ligolw_add(ligolw.Document(), [fname]) for fname in filenames] # Get the list of dinstinct segment_definers across all docs segment_definers = {} def register_definer(seg_def): key = (seg_def.ifos, seg_def.name, seg_def.version) segment_definers[key] = True return key for xmldoc in xmldocs: seg_def_table = table.get_table(xmldoc, lsctables.SegmentDefTable.tableName) map (register_definer, seg_def_table) # For each unique segment definer, find the intersection for ifo, name, version in segment_definers: if operation == INTERSECT: # If I were feeling especially functional-ist I'd write this # with reduce() result = glue.segments.segmentlist([glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity())]) for xmldoc in xmldocs: result &= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == UNION: result = glue.segments.segmentlist([]) for xmldoc in xmldocs: result |= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == DIFF: result = find_segments(xmldocs[0], '%s:%s:%d' % (ifo, name, version), use_segment_table) for xmldoc in xmldocs[1:]: result -= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) else: raise NameError ("%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer for the result seg_def_id = add_to_segment_definer(outdoc, proc_id, ifo, name, version) # Add the segments if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, result) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, result) # If we're preserving, also load up everything into the output document. if preserve: # Add them to the output document map(lambda x: outdoc.appendChild(x.childNodes[0]), xmldocs) # Merge the ligolw elements and tables ligolw_add.merge_ligolws(outdoc) ligolw_add.merge_compatible_tables(outdoc) return outdoc, abs(result)
def run_query_segments(doc, process_id, engine, gps_start_time, gps_end_time, include_segments, exclude_segments, result_name): segdefs = [] for included in include_segments.split(','): spec = included.split(':') if len(spec) < 2 or len(spec) > 3: print("Included segements must be of the form ifo:name:version or ifo:name:*", file=sys.stderr) sys.exit(1) ifo = spec[0] name = spec[1] if len(spec) is 3 and spec[2] is not '*': version = int(spec[2]) if version < 1: print("Segment version numbers must be greater than zero", file=sys.stderr) sys.exit(1) else: version = '*' segdefs += segmentdb_utils.expand_version_number(engine, (ifo, name, version, gps_start_time, gps_end_time, 0, 0) ) found_segments = segmentdb_utils.query_segments(engine, 'segment', segdefs) found_segments = reduce(operator.or_, found_segments).coalesce() # We could also do: segment_summaries = segmentdb_utils.query_segments(engine, 'segment_summary', segdefs) # And we could write out everything we found segmentdb_utils.add_segment_info(doc, process_id, segdefs, None, segment_summaries) # Do the same for excluded if exclude_segments: ex_segdefs = [] for excluded in exclude_segments.split(','): spec = excluded.split(':') if len(spec) < 2: print("Excluded segements must be of the form ifo:name:version or ifo:name:*", file=sys.stderr) sys.exit(1) ifo = spec[0] name = spec[1] version = len(spec) > 2 and spec[2] or '*' ex_segdefs += segmentdb_utils.expand_version_number(engine, (ifo, name, version, gps_start_time, gps_end_time, 0, 0) ) excluded_segments = segmentdb_utils.query_segments(engine, 'segment', ex_segdefs) excluded_segments = reduce(operator.or_, excluded_segments).coalesce() found_segments.coalesce() found_segments -= excluded_segments # Add the result type to the segment definer table seg_name = result_name seg_def_id = segmentdb_utils.add_to_segment_definer(doc, process_id, ifo, seg_name, 1) # and segment summary segmentdb_utils.add_to_segment_summary(doc, process_id, seg_def_id, [[gps_start_time, gps_end_time]]) # and store the segments segmentdb_utils.add_to_segment(doc, process_id, seg_def_id, found_segments) print("Made it to the end of the query code") print(doc)
def run_query_segments(doc, process_id, engine, gps_start_time, gps_end_time, include_segments, exclude_segments, result_name): segdefs = [] for included in include_segments.split(','): spec = included.split(':') if len(spec) < 2 or len(spec) > 3: print >> sys.stderr, "Included segements must be of the form ifo:name:version or ifo:name:*" sys.exit(1) ifo = spec[0] name = spec[1] if len(spec) is 3 and spec[2] is not '*': version = int(spec[2]) if version < 1: print >> sys.stderr, "Segment version numbers must be greater than zero" sys.exit(1) else: version = '*' segdefs += segmentdb_utils.expand_version_number( engine, (ifo, name, version, gps_start_time, gps_end_time, 0, 0)) found_segments = segmentdb_utils.query_segments(engine, 'segment', segdefs) found_segments = reduce(operator.or_, found_segments).coalesce() # We could also do: segment_summaries = segmentdb_utils.query_segments(engine, 'segment_summary', segdefs) # And we could write out everything we found segmentdb_utils.add_segment_info(doc, process_id, segdefs, None, segment_summaries) # Do the same for excluded if exclude_segments: ex_segdefs = [] for excluded in exclude_segments.split(','): spec = excluded.split(':') if len(spec) < 2: print >> sys.stderr, "Excluded segements must be of the form ifo:name:version or ifo:name:*" sys.exit(1) ifo = spec[0] name = spec[1] version = len(spec) > 2 and spec[2] or '*' ex_segdefs += segmentdb_utils.expand_version_number( engine, (ifo, name, version, gps_start_time, gps_end_time, 0, 0)) excluded_segments = segmentdb_utils.query_segments( engine, 'segment', ex_segdefs) excluded_segments = reduce(operator.or_, excluded_segments).coalesce() found_segments.coalesce() found_segments -= excluded_segments # Add the result type to the segment definer table seg_name = result_name seg_def_id = segmentdb_utils.add_to_segment_definer( doc, process_id, ifo, seg_name, 1) # and segment summary segmentdb_utils.add_to_segment_summary(doc, process_id, seg_def_id, [[gps_start_time, gps_end_time]]) # and store the segments segmentdb_utils.add_to_segment(doc, process_id, seg_def_id, found_segments) print "Made it to the end of the query code" print doc
def setup_analysislogging(workflow, segs_list, insps, args, output_dir, program_name="workflow", tags=[]): """ This module sets up the analysis logging xml file that contains the following information: * Command line arguments that the code was run with * Segment list of times marked as SCIENCE * Segment list of times marked as SCIENCE and "OK" ie. not CAT_1 vetoed * Segment list of times marked as SCIENCE_OK and present on the cluster * The times that will be analysed by the matched-filter jobs Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance. segs_list : pycbc.workflow.core.FileList A list of Files containing the information needed to generate the segments above. For segments generated at run time the associated segmentlist is a property of this object. insps : pycbc.workflow.core.FileList The output files from the matched-filtering module. Used to identify what times have been analysed in this workflow. output_dir : path Directory to output any files to. program_name : string (optional, default = "workflow") The program name to stick in the process/process_params tables. tags : list (optional, default = []) If given restrict to considering inspiral and segment files that are tagged with all tags in this list. """ logging.info("Entering analysis logging module.") make_analysis_dir(output_dir) # Construct the summary XML file outdoc = ligolw.Document() outdoc.appendChild(ligolw.LIGO_LW()) # Add process and process_params tables proc_id = process.register_to_xmldoc(outdoc, program_name, vars(args) ).process_id # Now add the various segment lists to this file summ_segs = segmentlist([workflow.analysis_time]) # If tags is given filter by tags if tags: for tag in tags: segs_list = segs_list.find_output_with_tag(tag) insps = insps.find_output_with_tag(tag) for ifo in workflow.ifos: # Lets get the segment lists we need seg_ifo_files = segs_list.find_output_with_ifo(ifo) # SCIENCE sci_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE') if len(sci_seg_file) == 1: sci_seg_file = sci_seg_file[0] print sci_seg_file.segment_dict.keys() sci_segs = sci_seg_file.segment_dict['%s:%s' %(ifo, 'RESULT')] sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sci_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id, summ_segs, comment='') elif sci_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_seg_file), ifo, 'SCIENCE') #raise ValueError(err_msg) # SCIENCE_OK sci_ok_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE_OK') if len(sci_ok_seg_file) == 1: sci_ok_seg_file = sci_ok_seg_file[0] sci_ok_segs = \ sci_ok_seg_file.segment_dict['%s:%s' %(ifo,'SCIENCE_OK')] sci_ok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_OK", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_ok_def_id, sci_ok_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_ok_def_id, summ_segs, comment='') elif sci_ok_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_ok_seg_file), ifo, 'SCIENCE_OK') #raise ValueError(err_msg) # SCIENCE_AVAILABLE sci_available_seg_file = seg_ifo_files.find_output_with_tag(\ 'SCIENCE_AVAILABLE') if len(sci_available_seg_file) == 1: sci_available_seg_file = sci_available_seg_file[0] sci_available_segs = sci_available_seg_file.segment_dict sci_available_segs = \ sci_available_segs['%s:%s' %(ifo, 'SCIENCE_AVAILABLE')] sci_available_def_id = segmentdb_utils.add_to_segment_definer(\ outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_AVAILABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_available_def_id, sci_available_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_available_def_id, summ_segs, comment='') elif sci_available_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_available_seg_file), ifo, 'SCIENCE_AVAILABLE') #raise ValueError(err_msg) # ANALYSABLE - This one needs to come from inspiral outs ifo_insps = insps.find_output_with_ifo(ifo) analysable_segs = ifo_insps.get_times_covered_by_files() analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysable_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, analysable_def_id, summ_segs, comment='') summ_file = File(workflow.ifos, "WORKFLOW_SUMMARY", workflow.analysis_time, extension=".xml", directory=output_dir) summ_file.PFN(summ_file.storage_path, site='local') utils.write_filename(outdoc, summ_file.storage_path) return FileList([summ_file])
def setup_analysislogging(workflow, segs_list, insps, args, output_dir, program_name="workflow", tags=[]): """ This module sets up the analysis logging xml file that contains the following information: * Command line arguments that the code was run with * Segment list of times marked as SCIENCE * Segment list of times marked as SCIENCE and "OK" ie. not CAT_1 vetoed * Segment list of times marked as SCIENCE_OK and present on the cluster * The times that will be analysed by the matched-filter jobs Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance. segs_list : pycbc.workflow.core.FileList A list of Files containing the information needed to generate the segments above. For segments generated at run time the associated segmentlist is a property of this object. insps : pycbc.workflow.core.FileList The output files from the matched-filtering module. Used to identify what times have been analysed in this workflow. output_dir : path Directory to output any files to. program_name : string (optional, default = "workflow") The program name to stick in the process/process_params tables. tags : list (optional, default = []) If given restrict to considering inspiral and segment files that are tagged with all tags in this list. """ logging.info("Entering analysis logging module.") make_analysis_dir(output_dir) # Construct the summary XML file outdoc = ligolw.Document() outdoc.appendChild(ligolw.LIGO_LW()) # Add process and process_params tables proc_id = process.register_to_xmldoc(outdoc, program_name, vars(args) ).process_id # Now add the various segment lists to this file summ_segs = segmentlist([workflow.analysis_time]) # If tags is given filter by tags if tags: for tag in tags: segs_list = segs_list.find_output_with_tag(tag) insps = insps.find_output_with_tag(tag) for ifo in workflow.ifos: # Lets get the segment lists we need seg_ifo_files = segs_list.find_output_with_ifo(ifo) # SCIENCE sci_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE') if len(sci_seg_file) == 1: sci_seg_file = sci_seg_file[0] sci_segs = sci_seg_file.segmentList sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sci_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id, summ_segs, comment='') elif sci_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_seg_file), ifo, 'SCIENCE') #raise ValueError(err_msg) # SCIENCE_OK sci_ok_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE_OK') if len(sci_ok_seg_file) == 1: sci_ok_seg_file = sci_ok_seg_file[0] sci_ok_segs = sci_ok_seg_file.segmentList sci_ok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_OK", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_ok_def_id, sci_ok_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_ok_def_id, summ_segs, comment='') elif sci_ok_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_ok_seg_file), ifo, 'SCIENCE_OK') #raise ValueError(err_msg) # SCIENCE_AVAILABLE sci_available_seg_file = seg_ifo_files.find_output_with_tag(\ 'SCIENCE_AVAILABLE') if len(sci_available_seg_file) == 1: sci_available_seg_file = sci_available_seg_file[0] sci_available_segs = sci_available_seg_file.segmentList sci_available_def_id = segmentdb_utils.add_to_segment_definer(\ outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_AVAILABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_available_def_id, sci_available_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_available_def_id, summ_segs, comment='') elif sci_available_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_available_seg_file), ifo, 'SCIENCE_AVAILABLE') #raise ValueError(err_msg) # ANALYSABLE - This one needs to come from inspiral outs ifo_insps = insps.find_output_with_ifo(ifo) analysable_segs = ifo_insps.get_times_covered_by_files() analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysable_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, analysable_def_id, summ_segs, comment='') summ_file = File(workflow.ifos, "WORKFLOW_SUMMARY", workflow.analysis_time, extension=".xml", directory=output_dir) summ_file.PFN(summ_file.storage_path, site='local') utils.write_filename(outdoc, summ_file.storage_path) return FileList([summ_file])