def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 # get rid of duplicate segments which happen when splitting the bank segments = segmentlist(frozenset(segments)) segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = SegFile.from_segment_list('%s_%s' %(segment_name, i), segmentlist(segs), segment_name, ifo, valid_segment=workflow.analysis_time, extension='xml', directory=out_dir) psd_files += [make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, tags=tags + ['PART%s' % i])] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def build_segment_list_one(engine, gps_start_time, gps_end_time, ifo, segment_name, version = None, start_pad = 0, end_pad = 0): """Builds a list of segments satisfying the given criteria """ seg_result = segmentlist([]) sum_result = segmentlist([]) # Is there any way to get segment and segement summary in one query? # Maybe some sort of outer join where we keep track of which segment # summaries we've already seen. sql = "SELECT segment_summary.start_time, segment_summary.end_time " sql += "FROM segment_definer, segment_summary " sql += "WHERE segment_summary.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = '%s' " % ifo if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment_summary.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment_summary.end_time OR segment_summary.start_time > %s)" % (gps_start_time, gps_end_time) rows = engine.query(sql) for sum_start_time, sum_end_time in rows: sum_start_time = (sum_start_time < gps_start_time) and gps_start_time or sum_start_time sum_end_time = (sum_end_time > gps_end_time) and gps_end_time or sum_end_time sum_result |= segmentlist([segment(sum_start_time, sum_end_time)]) # We can't use queries paramaterized with ? since the ldbd protocol doesn't support it... sql = "SELECT segment.start_time + %d, segment.end_time + %d " % (start_pad, end_pad) sql += "FROM segment, segment_definer " sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id " if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % (gps_start_time, gps_end_time) rows = engine.query(sql) for seg_start_time, seg_end_time in rows: seg_start_time = (seg_start_time < gps_start_time) and gps_start_time or seg_start_time seg_end_time = (seg_end_time > gps_end_time) and gps_end_time or seg_end_time seg_result |= segmentlist([segment(seg_start_time, seg_end_time)]) engine.close() return sum_result, seg_result
def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int( workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 # get rid of duplicate segments which happen when splitting the bank segments = segmentlist(frozenset(segments)) segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = SegFile.from_segment_list( '%s_%s' % (segment_name, i), segmentlist(segs), segment_name, ifo, valid_segment=workflow.analysis_time, extension='xml', directory=out_dir) psd_files += [ make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, tags=tags + ['PART%s' % i]) ] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def get_science_segs_from_datafind_outs(datafindcaches): """ This function will calculate the science segments that are covered in the OutGroupList containing the frame files returned by various calls to the datafind server. This can then be used to check whether this list covers what it is expected to cover. Parameters ---------- datafindcaches : OutGroupList List of all the datafind output files. Returns -------- newScienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances The times covered by the frames found in datafindOuts. """ newScienceSegs = {} for cache in datafindcaches: if len(cache) > 0: groupSegs = segments.segmentlist(e.segment for e in cache).coalesce() ifo = cache.ifo if not newScienceSegs.has_key(ifo): newScienceSegs[ifo] = groupSegs else: newScienceSegs[ifo].extend(groupSegs) newScienceSegs[ifo].coalesce() return newScienceSegs
def fromtama(file, coltype=lal.LIGOTimeGPS): """ Read a segmentlist from the file object file containing TAMA locked-segments data. Parsing stops on the first line that cannot be parsed (which is consumed). The segmentlist will be created with segments whose boundaries are of type coltype, which should raise ValueError if it cannot convert its string argument. NOTE: TAMA locked-segments files contain non-integer start and end times, so the default column type is set to LIGOTimeGPS. NOTE: the output is a segmentlist as described by the file; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. """ segmentpat = re.compile( r"\A\s*\S+\s+\S+\s+\S+\s+([\d.+-eE]+)\s+([\d.+-eE]+)") l = segments.segmentlist() for line in file: try: [tokens] = segmentpat.findall(line) l.append(segments.segment(map(coltype, tokens[0:2]))) except ValueError: break return l
def segmentlistdict(self): """ A segmentlistdict object describing the instruments and time spanned by this CacheEntry. A new object is constructed each time this attribute is accessed (segments are immutable so there is no reason to try to share a reference to the CacheEntry's internal segment; modifications of one would not be reflected in the other anyway). Example: >>> c = CacheEntry(u"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> c.segmentlistdict {u'H1': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]} The \"observatory\" column of the cache entry, which is frequently used to store instrument names, is parsed into instrument names for the dictionary keys using the same rules as pycbc_glue.ligolw.lsctables.instrument_set_from_ifos(). Example: >>> c = CacheEntry(u"H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml") >>> c.segmentlistdict {u'H1H2': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]} """ # the import has to be done here to break the cyclic # dependancy from pycbc_glue.ligolw.lsctables import instrument_set_from_ifos instruments = instrument_set_from_ifos(self.observatory) or (None,) return segments.segmentlistdict((instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
def indices_within_segments(times, segment_files, ifo=None, segment_name=None): """ Return the list of indices that should be vetoed by the segments in the list of veto_files. Parameters ---------- times: numpy.ndarray of integer type Array of gps start times segment_files: string or list of strings A string or list of strings that contain the path to xml files that contain a segment table ifo: string, optional The ifo to retrieve segments for from the segment files segment_name: str, optional name of segment Returns ------- indices: numpy.ndarray The array of index values within the segments segmentlist: The segment list corresponding to the selected time. """ veto_segs = segmentlist([]) indices = numpy.array([], dtype=numpy.uint32) for veto_file in segment_files: veto_segs += select_segments_by_definer(veto_file, segment_name, ifo) veto_segs.coalesce() start, end = segments_to_start_end(veto_segs) if len(start) > 0: idx = indices_within_times(times, start, end) indices = numpy.union1d(indices, idx) return indices, veto_segs.coalesce()
def __init__(self, active = (), valid = (), instruments = set(), name = None, version = None, comment = None): """ Initialize a new LigolwSegmentList instance. active and valid are sequences that will be cast to segments.segmentlist objects. They can be generator expressions. The "active" sequence is what is usually thought of as the segment list, the "valid" sequence identifies the intervals of time for which the segment list's state is defined. """ self.valid = segments.segmentlist(valid) self.active = segments.segmentlist(active) self.instruments = instruments self.name = name self.version = version self.comment = comment
def find_segments(doc, key, use_segment_table = True): key_pieces = key.split(':') while len(key_pieces) < 3: key_pieces.append('*') filter_func = lambda x: str(x.ifos) == key_pieces[0] and (str(x.name) == key_pieces[1] or key_pieces[1] == '*') and (str(x.version) == key_pieces[2] or key_pieces[2] == '*') # Find all segment definers matching the critieria seg_def_table = table.get_table(doc, lsctables.SegmentDefTable.tableName) seg_defs = filter(filter_func, seg_def_table) seg_def_ids = map(lambda x: str(x.segment_def_id), seg_defs) # Find all segments belonging to those definers if use_segment_table: seg_table = table.get_table(doc, lsctables.SegmentTable.tableName) seg_entries = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_table) else: seg_sum_table = table.get_table(doc, lsctables.SegmentSumTable.tableName) seg_entries = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_sum_table) # Combine into a segmentlist ret = segmentlist(map(lambda x: segment(x.start_time, x.end_time), seg_entries)) ret.coalesce() return ret
def fromfilenames(filenames, coltype=int): """ Return a segmentlist describing the intervals spanned by the files whose names are given in the list filenames. The segmentlist is constructed by parsing the file names, and the boundaries of each segment are coerced to type coltype. The file names are parsed using a generalization of the format described in Technical Note LIGO-T010150-00-E, which allows the start time and duration appearing in the file name to be non-integers. NOTE: the output is a segmentlist as described by the file names; if the file names are not in time order, or describe overlaping segments, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. """ pattern = re.compile(r"-([\d.]+)-([\d.]+)\.[\w_+#]+\Z") l = segments.segmentlist() for name in filenames: [(s, d)] = pattern.findall(name.strip().rstrip(".gz")) s = coltype(s) d = coltype(d) l.append(segments.segment(s, s + d)) return l
def find_times(self, site, frametype, gpsstart=None, gpsend=None): """Query the LDR for times for which frames are avaliable Use gpsstart and gpsend to restrict the returned times to this semiopen interval. @returns: L{segmentlist<pycbc_glue.segments.segmentlist>} @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} """ if gpsstart and gpsend: url = ("%s/gwf/%s/%s/segments/%s,%s.json" % (_url_prefix, site, frametype, gpsstart, gpsend)) else: url = ("%s/gwf/%s/%s/segments.json" % (_url_prefix, site, frametype)) response = self._requestresponse("GET", url) segmentlist = decode(response.read()) return segments.segmentlist(map(segments.segment, segmentlist))
def expand_version_number(engine, segdef): ifo, name, version, start_time, end_time, start_pad, end_pad = segdef if version != '*': return [segdef] # Start looking at the full interval intervals = segmentlist([segment(start_time, end_time)]) # Find the maximum version number sql = "SELECT max(version) FROM segment_definer " sql += "WHERE segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % name rows = engine.query(sql) try: version = len(rows[0]) and rows[0][0] or 1 except: version = None results = [] while version > 0: for interval in intervals: segs = query_segments(engine, 'segment_summary', [(ifo, name, version, interval[0], interval[1], 0, 0)]) for seg in segs[0]: results.append( (ifo, name, version, seg[0], seg[1], 0, 0) ) intervals.coalesce() intervals -= segs[0] version -= 1 return results
def segmentlistdict(self): """ A segmentlistdict object describing the instruments and time spanned by this CacheEntry. A new object is constructed each time this attribute is accessed (segments are immutable so there is no reason to try to share a reference to the CacheEntry's internal segment; modifications of one would not be reflected in the other anyway). Example: >>> c = CacheEntry(u"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> c.segmentlistdict {u'H1': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]} The \"observatory\" column of the cache entry, which is frequently used to store instrument names, is parsed into instrument names for the dictionary keys using the same rules as pycbc_glue.ligolw.lsctables.instrument_set_from_ifos(). Example: >>> c = CacheEntry(u"H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml") >>> c.segmentlistdict {u'H1H2': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]} """ # the import has to be done here to break the cyclic # dependancy from pycbc_glue.ligolw.lsctables import instrument_set_from_ifos instruments = instrument_set_from_ifos(self.observatory) or (None, ) return segments.segmentlistdict( (instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
def fromsegwizard(file, coltype=int, strict=True): """ Read a segmentlist from the file object file containing a segwizard compatible segment list. Parsing stops on the first line that cannot be parsed (which is consumed). The segmentlist will be created with segment whose boundaries are of type coltype, which should raise ValueError if it cannot convert its string argument. Two-column, three-column, and four-column segwizard files are recognized, but the entire file must be in the same format, which is decided by the first parsed line. If strict is True and the file is in three- or four-column format, then each segment's duration is checked against that column in the input file. NOTE: the output is a segmentlist as described by the file; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. """ commentpat = re.compile(r"\s*([#;].*)?\Z", re.DOTALL) twocolsegpat = re.compile(r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z") threecolsegpat = re.compile( r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z") fourcolsegpat = re.compile( r"\A\s*([\d]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z") format = None l = segments.segmentlist() for line in file: line = commentpat.split(line)[0] if not line: continue try: [tokens] = fourcolsegpat.findall(line) num = int(tokens[0]) seg = segments.segment(map(coltype, tokens[1:3])) duration = coltype(tokens[3]) this_line_format = 4 except ValueError: try: [tokens] = threecolsegpat.findall(line) seg = segments.segment(map(coltype, tokens[0:2])) duration = coltype(tokens[2]) this_line_format = 3 except ValueError: try: [tokens] = twocolsegpat.findall(line) seg = segments.segment(map(coltype, tokens[0:2])) duration = abs(seg) this_line_format = 2 except ValueError: break if strict: if abs(seg) != duration: raise ValueError("segment '%s' has incorrect duration" % line) if format is None: format = this_line_format elif format != this_line_format: raise ValueError("segment '%s' format mismatch" % line) l.append(seg) return l
def S2playground(extent): """ Return a segmentlist identifying the S2 playground times within the interval defined by the segment extent. Example: >>> from pycbc_glue import segments >>> S2playground(segments.segment(874000000, 874010000)) [segment(874000013, 874000613), segment(874006383, 874006983)] """ lo = int(extent[0]) lo -= (lo - 729273613) % 6370 hi = int(extent[1]) + 1 return segments.segmentlist( segments.segment(t, t + 600) for t in range(lo, hi, 6370)) & segments.segmentlist([extent])
def run_query_segments(doc, proc_id, engine, gps_start_time, gps_end_time, included_segments_string, excluded_segments_string = None, write_segments = True, start_pad = 0, end_pad = 0): """Runs a segment query. This was originally part of ligolw_query_segments, but now is also used by ligolw_segments_from_cats. The write_segments option is provided so callers can coalesce segments obtained over sever invocations (as segments_from_cats does). """ if write_segments: all_ifos = {} for ifo, segment_name, version in split_segment_ids(included_segments_string.split(',')): all_ifos[ifo] = True new_seg_def_id = add_to_segment_definer(doc, proc_id, ''.join(all_ifos.keys()), 'result', 0) add_to_segment_summary(doc, proc_id, new_seg_def_id, [[gps_start_time, gps_end_time]]) result = segmentlist([]) for ifo, segment_name, version in split_segment_ids(included_segments_string.split(',')): sum_segments, seg_segments = build_segment_list(engine, gps_start_time, gps_end_time, ifo, segment_name, version, start_pad, end_pad) seg_def_id = add_to_segment_definer(doc, proc_id, ifo, segment_name, version) add_to_segment_summary(doc, proc_id, seg_def_id, sum_segments) # and accumulate segments result |= seg_segments # Excluded segments are not required if excluded_segments_string: excluded_segments = segmentlist([]) for ifo, segment_name, version in split_segment_ids(excluded_segments_string.split(',')): sum_segments, seg_segments = build_segment_list(engine, gps_start_time, gps_end_time, ifo, segment_name, version) excluded_segments |= seg_segments result = result - excluded_segments result.coalesce() # Add the segments if write_segments: add_to_segment(doc, proc_id, new_seg_def_id, result) return result
def get_segment_summary_times(scienceFile, segmentName): """ This function will find the times for which the segment_summary is set for the flag given by segmentName. Parameters ----------- scienceFile : SegFile The segment file that we want to use to determine this. segmentName : string The DQ flag to search for times in the segment_summary table. Returns --------- summSegList : glue.segments.segmentlist The times that are covered in the segment summary table. """ # Parse the segmentName segmentName = segmentName.split(':') if not len(segmentName) in [2, 3]: raise ValueError("Invalid channel name %s." % (segmentName)) ifo = segmentName[0] channel = segmentName[1] version = '' if len(segmentName) == 3: version = int(segmentName[2]) # Load the filename xmldoc = utils.load_filename( scienceFile.cache_entry.path, gz=scienceFile.cache_entry.path.endswith("gz"), contenthandler=ContentHandler) # Get the segment_def_id for the segmentName segmentDefTable = table.get_table(xmldoc, "segment_definer") for entry in segmentDefTable: if (entry.ifos == ifo) and (entry.name == channel): if len(segmentName) == 2 or (entry.version == version): segDefID = entry.segment_def_id break else: raise ValueError("Cannot find channel %s in segment_definer table."\ %(segmentName)) # Get the segmentlist corresponding to this segmentName in segment_summary segmentSummTable = table.get_table(xmldoc, "segment_summary") summSegList = segments.segmentlist([]) for entry in segmentSummTable: if entry.segment_def_id == segDefID: segment = segments.segment(entry.start_time, entry.end_time) summSegList.append(segment) summSegList.coalesce() return summSegList
def get_segment_summary_times(scienceFile, segmentName): """ This function will find the times for which the segment_summary is set for the flag given by segmentName. Parameters ----------- scienceFile : SegFile The segment file that we want to use to determine this. segmentName : string The DQ flag to search for times in the segment_summary table. Returns --------- summSegList : glue.segments.segmentlist The times that are covered in the segment summary table. """ # Parse the segmentName segmentName = segmentName.split(':') if not len(segmentName) in [2,3]: raise ValueError("Invalid channel name %s." %(segmentName)) ifo = segmentName[0] channel = segmentName[1] version = '' if len(segmentName) == 3: version = int(segmentName[2]) # Load the filename xmldoc = utils.load_filename(scienceFile.cache_entry.path, gz=scienceFile.cache_entry.path.endswith("gz"), contenthandler=ContentHandler) # Get the segment_def_id for the segmentName segmentDefTable = table.get_table(xmldoc, "segment_definer") for entry in segmentDefTable: if (entry.ifos == ifo) and (entry.name == channel): if len(segmentName) == 2 or (entry.version==version): segDefID = entry.segment_def_id break else: raise ValueError("Cannot find channel %s in segment_definer table."\ %(segmentName)) # Get the segmentlist corresponding to this segmentName in segment_summary segmentSummTable = table.get_table(xmldoc, "segment_summary") summSegList = segments.segmentlist([]) for entry in segmentSummTable: if entry.segment_def_id == segDefID: segment = segments.segment(entry.start_time, entry.end_time) summSegList.append(segment) summSegList.coalesce() return summSegList
def pad_and_truncate(row_start, row_end): tmp = segmentlist([segment(row_start + start_pad, row_end + end_pad)]) # No coalesce needed as a list with a single segment is already coalesced tmp &= search_span_list # The intersection is guaranteed to be non-empty if the row passed match() # PR 2969: The above comment is incorrect. Negative padding may cause # an empty intersection. if len(tmp) == 0: return segment(0,0) else: return tmp[0]
def segmenttable_get_by_name(xmldoc, name): """ Retrieve the segmentlists whose name equals name. The result is a segmentlistdict indexed by instrument. The output of this function is not coalesced, each segmentlist contains the segments as found in the segment table. NOTE: this is a light-weight version of the .get_by_name() method of the LigolwSegments class intended for use when the full machinery of that class is not required. Considerably less document validation and error checking is performed by this version. Consider using that method instead if your application will be interfacing with the document via that class anyway. """ # # find required tables # def_table = lsctables.SegmentDefTable.get_table(xmldoc) seg_table = lsctables.SegmentTable.get_table(xmldoc) # # segment_def_id --> instrument names mapping but only for # segment_definer entries bearing the requested name # instrument_index = dict((row.segment_def_id, row.instruments) for row in def_table if row.name == name) # # populate result segmentlistdict object from segment_def_map table # and index # instruments = set(instrument for instruments in instrument_index.values() for instrument in instruments) result = segments.segmentlistdict((instrument, segments.segmentlist()) for instrument in instruments) for row in seg_table: if row.segment_def_id in instrument_index: seg = row.segment for instrument in instrument_index[row.segment_def_id]: result[instrument].append(seg) # # done # return result
def select_segments_by_definer(segment_file, segment_name=None, ifo=None): """ Return the list of segments that match the segment name Parameters ---------- segment_file: str path to segment xml file segment_name: str Name of segment ifo: str, optional Returns ------- seg: list of segments """ from pycbc_glue.ligolw.ligolw import LIGOLWContentHandler as h lsctables.use_in(h) indoc = ligolw_utils.load_filename(segment_file, False, contenthandler=h) segment_table = table.get_table(indoc, 'segment') seg_def_table = table.get_table(indoc, 'segment_definer') def_ifos = seg_def_table.getColumnByName('ifos') def_names = seg_def_table.getColumnByName('name') def_ids = seg_def_table.getColumnByName('segment_def_id') valid_id = [] for def_ifo, def_name, def_id in zip(def_ifos, def_names, def_ids): if ifo and ifo != def_ifo: continue if segment_name and segment_name != def_name: continue valid_id += [def_id] start = numpy.array(segment_table.getColumnByName('start_time')) start_ns = numpy.array(segment_table.getColumnByName('start_time_ns')) end = numpy.array(segment_table.getColumnByName('end_time')) end_ns = numpy.array(segment_table.getColumnByName('end_time_ns')) start, end = start + 1e-9 * start_ns, end + 1e-9 * end_ns did = segment_table.getColumnByName('segment_def_id') keep = numpy.array([d in valid_id for d in did]) if sum(keep) > 0: return start_end_to_segments(start[keep], end[keep]) else: return segmentlist([])
def select_segments_by_definer(segment_file, segment_name=None, ifo=None): """ Return the list of segments that match the segment name Parameters ---------- segment_file: str path to segment xml file segment_name: str Name of segment ifo: str, optional Returns ------- seg: list of segments """ from pycbc_glue.ligolw.ligolw import LIGOLWContentHandler as h; lsctables.use_in(h) indoc = ligolw_utils.load_filename(segment_file, False, contenthandler=h) segment_table = table.get_table(indoc, 'segment') seg_def_table = table.get_table(indoc, 'segment_definer') def_ifos = seg_def_table.getColumnByName('ifos') def_names = seg_def_table.getColumnByName('name') def_ids = seg_def_table.getColumnByName('segment_def_id') valid_id = [] for def_ifo, def_name, def_id in zip(def_ifos, def_names, def_ids): if ifo and ifo != def_ifo: continue if segment_name and segment_name != def_name: continue valid_id += [def_id] start = numpy.array(segment_table.getColumnByName('start_time')) start_ns = numpy.array(segment_table.getColumnByName('start_time_ns')) end = numpy.array(segment_table.getColumnByName('end_time')) end_ns = numpy.array(segment_table.getColumnByName('end_time_ns')) start, end = start + 1e-9 * start_ns, end + 1e-9 * end_ns did = segment_table.getColumnByName('segment_def_id') keep = numpy.array([d in valid_id for d in did]) if sum(keep) > 0: return start_end_to_segments(start[keep], end[keep]) else: return segmentlist([])
def fromlalcache(cachefile, coltype=int): """ Construct a segmentlist representing the times spanned by the files identified in the LAL cache contained in the file object file. The segmentlist will be created with segments whose boundaries are of type coltype, which should raise ValueError if it cannot convert its string argument. Example: >>> from pycbc_glue.lal import LIGOTimeGPS >>> cache_seglists = fromlalcache(open(filename), coltype = LIGOTimeGPS).coalesce() See also: pycbc_glue.lal.CacheEntry """ return segments.segmentlist( lal.CacheEntry(l, coltype=coltype).segment for l in cachefile)
def from_range_strings(ranges, boundtype=int): """ Parse a list of ranges expressed as strings in the form "value" or "first:last" into an equivalent pycbc_glue.segments.segmentlist. In the latter case, an empty string for "first" and(or) "last" indicates a (semi)infinite range. A typical use for this function is in parsing command line options or entries in configuration files. NOTE: the output is a segmentlist as described by the strings; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. Example: >>> text = "0:10,35,100:" >>> from_range_strings(text.split(",")) [segment(0, 10), segment(35, 35), segment(100, infinity)] """ # preallocate segmentlist segs = segments.segmentlist([None] * len(ranges)) # iterate over strings for i, range in enumerate(ranges): parts = range.split(":") if len(parts) == 1: parts = boundtype(parts[0]) segs[i] = segments.segment(parts, parts) continue if len(parts) != 2: raise ValueError(range) if parts[0] == "": parts[0] = segments.NegInfinity else: parts[0] = boundtype(parts[0]) if parts[1] == "": parts[1] = segments.PosInfinity else: parts[1] = boundtype(parts[1]) segs[i] = segments.segment(parts[0], parts[1]) # success return segs
def get_missing_segs_from_frame_file_cache(datafindcaches): """ This function will use os.path.isfile to determine if all the frame files returned by the local datafind server actually exist on the disk. This can then be used to update the science times if needed. Parameters ----------- datafindcaches : OutGroupList List of all the datafind output files. Returns -------- missingFrameSegs : Dict. of ifo keyed glue.segment.segmentlist instances The times corresponding to missing frames found in datafindOuts. missingFrames: Dict. of ifo keyed lal.Cache instances The list of missing frames """ missingFrameSegs = {} missingFrames = {} for cache in datafindcaches: if len(cache) > 0: # Don't bother if these are not file:// urls, assume all urls in # one cache file must be the same type if not cache[0].scheme == 'file': warn_msg = "We have %s entries in the " %(cache[0].scheme,) warn_msg += "cache file. I do not check if these exist." logging.info(warn_msg) continue _, currMissingFrames = cache.checkfilesexist(on_missing="warn") missingSegs = segments.segmentlist(e.segment \ for e in currMissingFrames).coalesce() ifo = cache.ifo if not missingFrameSegs.has_key(ifo): missingFrameSegs[ifo] = missingSegs missingFrames[ifo] = lal.Cache(currMissingFrames) else: missingFrameSegs[ifo].extend(missingSegs) # NOTE: This .coalesce probably isn't needed as the segments # should be disjoint. If speed becomes an issue maybe remove it? missingFrameSegs[ifo].coalesce() missingFrames[ifo].extend(currMissingFrames) return missingFrameSegs, missingFrames
def get_missing_segs_from_frame_file_cache(datafindcaches): """ This function will use os.path.isfile to determine if all the frame files returned by the local datafind server actually exist on the disk. This can then be used to update the science times if needed. Parameters ----------- datafindcaches : OutGroupList List of all the datafind output files. Returns -------- missingFrameSegs : Dict. of ifo keyed glue.segment.segmentlist instances The times corresponding to missing frames found in datafindOuts. missingFrames: Dict. of ifo keyed lal.Cache instances The list of missing frames """ missingFrameSegs = {} missingFrames = {} for cache in datafindcaches: if len(cache) > 0: # Don't bother if these are not file:// urls, assume all urls in # one cache file must be the same type if not cache[0].scheme == 'file': warn_msg = "We have %s entries in the " % (cache[0].scheme, ) warn_msg += "cache file. I do not check if these exist." logging.info(warn_msg) continue _, currMissingFrames = cache.checkfilesexist(on_missing="warn") missingSegs = segments.segmentlist(e.segment \ for e in currMissingFrames).coalesce() ifo = cache.ifo if not missingFrameSegs.has_key(ifo): missingFrameSegs[ifo] = missingSegs missingFrames[ifo] = lal.Cache(currMissingFrames) else: missingFrameSegs[ifo].extend(missingSegs) # NOTE: This .coalesce probably isn't needed as the segments # should be disjoint. If speed becomes an issue maybe remove it? missingFrameSegs[ifo].coalesce() missingFrames[ifo].extend(currMissingFrames) return missingFrameSegs, missingFrames
def Fold(seglist1, seglist2): """ An iterator that generates the results of taking the intersection of seglist1 with each segment in seglist2 in turn. In each result, the segment start and stop values are adjusted to be with respect to the start of the corresponding segment in seglist2. See also the segmentlist_range() function. This has use in applications that wish to convert ranges of values to ranges relative to epoch boundaries. Below, a list of time intervals in hours is converted to a sequence of daily interval lists with times relative to midnight. Example: >>> from pycbc_glue.segments import * >>> x = segmentlist([segment(0, 13), segment(14, 20), segment(22, 36)]) >>> for y in Fold(x, segmentlist_range(0, 48, 24)): print y ... [segment(0, 13), segment(14, 20), segment(22, 24)] [segment(0, 12)] """ for seg in seglist2: yield (seglist1 & segments.segmentlist([seg])).shift(-seg[0])
# SCIENCE_OK sciokSegFile = segIfoFiles.find_output_with_tag('SCIENCE_OK') assert(len(sciokSegFile) == 1) sciokSegFile = sciokSegFile[0] sciokSegs = sciokSegFile.segmentList # SCIENCE_AVAILABLE sciavailableSegFile = segIfoFiles.find_output_with_tag('SCIENCE_AVAILABLE') assert(len(sciavailableSegFile) == 1) sciavailableSegFile = sciavailableSegFile[0] sciavailableSegs = sciavailableSegFile.segmentList # ANALYSABLE - This one needs to come from inspiral outs analysableSegs = insps.get_times_covered_by_files() # And add these to the output file # Start with the segment summary summSegs = segments.segmentlist([workflow.analysis_time]) sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE", 0) sciok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_OK", 0) sciavailable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_AVAILABLE", 0) analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_DAYHOPE_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sciSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciok_def_id, sciokSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciavailable_def_id, sciavailableSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysableSegs)
def start_end_to_segments(start, end): return segmentlist([segment(s, e) for s, e in zip(start, end)])
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None, tags=None): """ Setup datafind section of the workflow. This section is responsible for generating, or setting up the workflow to generate, a list of files that record the location of the frame files needed to perform the analysis. There could be multiple options here, the datafind jobs could be done at run time or could be put into a dag. The subsequent jobs will know what was done here from the OutFileList containing the datafind jobs (and the Dagman nodes if appropriate. For now the only implemented option is to generate the datafind files at runtime. This module can also check if the frameFiles actually exist, check whether the obtained segments line up with the original ones and update the science segments to reflect missing data files. Parameters ---------- workflow: pycbc.workflow.core.Workflow The workflow class that stores the jobs that will be run. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. outputDir : path All output files written by datafind processes will be written to this directory. seg_file : SegFile, optional (default=None) The file returned by get_science_segments containing the science segments and the associated segment_summary. This will be used for the segment_summary test and is required if, and only if, performing that test. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniqueify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- datafindOuts : OutGroupList List of all the datafind output files for use later in the pipeline. sci_avlble_file : SegFile SegFile containing the analysable time after checks in the datafind module are applied to the input segment list. For production runs this is expected to be equal to the input segment list. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. If the updateSegmentTimes kwarg is given this will be updated to reflect any instances of missing data. sci_avlble_name : string The name with which the analysable time is stored in the sci_avlble_file. """ if tags is None: tags = [] logging.info("Entering datafind module") make_analysis_dir(outputDir) cp = workflow.cp # Parse for options in ini file datafindMethod = cp.get_opt_tags("workflow-datafind", "datafind-method", tags) if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps", tags): checkSegmentGaps = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-gaps", tags) else: checkSegmentGaps = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist", tags): checkFramesExist = cp.get_opt_tags("workflow-datafind", "datafind-check-frames-exist", tags) else: checkFramesExist = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-segment-summary", tags): checkSegmentSummary = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-summary", tags) else: checkSegmentSummary = "no_test" logging.info("Starting datafind with setup_datafind_runtime_generated") if datafindMethod == "AT_RUNTIME_MULTIPLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_SINGLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_SINGLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "FROM_PREGENERATED_LCF_FILES": ifos = scienceSegs.keys() datafindcaches, datafindouts = \ setup_datafind_from_pregenerated_lcf_files(cp, ifos, outputDir, tags=tags) else: msg = "Entry datafind-method in [workflow-datafind] does not have " msg += "expected value. Valid values are " msg += "AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES " msg += "AT_RUNTIME_MULTIPLE_CACHES or AT_RUNTIME_SINGLE_CACHES. " msg += "Consult the documentation for more info." raise ValueError(msg) using_backup_server = False if datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES" or \ datafindMethod == "AT_RUNTIME_SINGLE_FRAMES": if cp.has_option_tags("workflow-datafind", "datafind-backup-datafind-server", tags): using_backup_server = True backup_server = cp.get_opt_tags("workflow-datafind", "datafind-backup-datafind-server", tags) cp_new = copy.deepcopy(cp) cp_new.set("workflow-datafind", "datafind-ligo-datafind-server", backup_server) cp_new.set('datafind', 'urltype', 'gsiftp') backup_datafindcaches, backup_datafindouts =\ setup_datafind_runtime_frames_single_call_perifo(cp_new, scienceSegs, outputDir, tags=tags) backup_datafindouts = datafind_keep_unique_backups(\ backup_datafindouts, datafindouts) datafindcaches.extend(backup_datafindcaches) datafindouts.extend(backup_datafindouts) logging.info("setup_datafind_runtime_generated completed") # If we don't have frame files covering all times we can update the science # segments. if checkSegmentGaps in ['warn','update_times','raise_error']: logging.info("Checking science segments against datafind output....") newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) logging.info("New segments calculated from data find output.....") missingData = False for ifo in scienceSegs.keys(): # If no science segments in input then do nothing if not scienceSegs[ifo]: msg = "No science segments are present for ifo %s, " %(ifo) msg += "the segment metadata indicates there is no analyzable" msg += " strain data between the selected GPS start and end " msg += "times." logging.warning(msg) continue if not newScienceSegs.has_key(ifo): msg = "No data frames were found corresponding to the science " msg += "segments for ifo %s" %(ifo) logging.error(msg) missingData = True if checkSegmentGaps == 'update_times': scienceSegs[ifo] = segments.segmentlist() continue missing = scienceSegs[ifo] - newScienceSegs[ifo] if abs(missing): msg = "From ifo %s we are missing frames covering:" %(ifo) msg += "\n%s" % "\n".join(map(str, missing)) missingData = True logging.error(msg) if checkSegmentGaps == 'update_times': # Remove missing time, so that we can carry on if desired logging.info("Updating science segments for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missing if checkSegmentGaps == 'raise_error' and missingData: raise ValueError("Workflow cannot find needed data, exiting.") logging.info("Done checking, any discrepancies are reported above.") elif checkSegmentGaps == 'no_test': pass else: errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Do all of the frame files that were returned actually exist? if checkFramesExist in ['warn','update_times','raise_error']: logging.info("Verifying that all frames exist on disk.") missingFrSegs, missingFrames = \ get_missing_segs_from_frame_file_cache(datafindcaches) missingFlag = False for ifo in missingFrames.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: continue # If using a backup server, does the frame exist remotely? if using_backup_server: # WARNING: This will be slow, but hopefully it will not occur # for too many frames. This could be optimized if # it becomes necessary. new_list = [] for frame in missingFrames[ifo]: for dfout in datafindouts: dfout_pfns = list(dfout.pfns) dfout_urls = [a.url for a in dfout_pfns] if frame.url in dfout_urls: pfn = dfout_pfns[dfout_urls.index(frame.url)] dfout.removePFN(pfn) if len(dfout.pfns) == 0: new_list.append(frame) else: msg = "Frame %s not found locally. "\ %(frame.url,) msg += "Replacing with remote url(s) %s." \ %(str([a.url for a in dfout.pfns]),) logging.info(msg) break else: new_list.append(frame) missingFrames[ifo] = new_list if missingFrames[ifo]: msg = "From ifo %s we are missing the following frames:" %(ifo) msg +='\n'.join([a.url for a in missingFrames[ifo]]) missingFlag = True logging.error(msg) if checkFramesExist == 'update_times': # Remove missing times, so that we can carry on if desired logging.info("Updating science times for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo] if checkFramesExist == 'raise_error' and missingFlag: raise ValueError("Workflow cannot find all frames, exiting.") logging.info("Finished checking frames.") elif checkFramesExist == 'no_test': pass else: errMsg = "checkFramesExist kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Check if there are cases where frames exist, but no entry in the segment # summary table are present. if checkSegmentSummary in ['warn', 'raise_error']: logging.info("Checking the segment summary table against frames.") dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) missingFlag = False # NOTE: Should this be overrideable in the config file? sci_seg_name = "SCIENCE" if seg_file is None: err_msg = "You must provide the science segments SegFile object " err_msg += "if using the datafind-check-segment-summary option." raise ValueError(err_msg) if seg_file.seg_summ_dict is None: err_msg = "The provided science segments SegFile object must " err_msg += "contain a valid segment_summary table if using the " err_msg += "datafind-check-segment-summary option." raise ValueError(err_msg) seg_summary_times = seg_file.seg_summ_dict for ifo in dfScienceSegs.keys(): curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name] missing = (dfScienceSegs[ifo] & seg_file.valid_segments) missing.coalesce() missing = missing - curr_seg_summ_times missing.coalesce() scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo] scienceButNotFrame.coalesce() missing2 = scienceSegs[ifo] - scienceButNotFrame missing2.coalesce() missing2 = missing2 - curr_seg_summ_times missing2.coalesce() if abs(missing): msg = "From ifo %s the following times have frames, " %(ifo) msg += "but are not covered in the segment summary table." msg += "\n%s" % "\n".join(map(str, missing)) logging.error(msg) missingFlag = True if abs(missing2): msg = "From ifo %s the following times have frames, " %(ifo) msg += "are science, and are not covered in the segment " msg += "summary table." msg += "\n%s" % "\n".join(map(str, missing2)) logging.error(msg) missingFlag = True if checkSegmentSummary == 'raise_error' and missingFlag: errMsg = "Segment_summary discrepancy detected, exiting." raise ValueError(errMsg) elif checkSegmentSummary == 'no_test': pass else: errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', " errMsg += "'warn', or 'raise_error'." raise ValueError(errMsg) # Now need to create the file for SCIENCE_AVAILABLE sci_avlble_dict = segments.segmentlistdict() # NOTE: Should this be overrideable in the config file? sci_avlble_name = "SCIENCE_AVAILABLE" for ifo in scienceSegs.keys(): sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo] sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE', sci_avlble_dict, ifo_list = scienceSegs.keys(), valid_segment=workflow.analysis_time, extension='.xml', tags=tags, directory=outputDir) logging.info("Leaving datafind module") return FileList(datafindouts), sci_avlble_file, scienceSegs, sci_avlble_name
def make_grb_segments_plot(wkflow, science_segs, trigger_time, trigger_name, out_dir, coherent_seg=None, fail_criterion=None): ifos = wkflow.ifos if len(science_segs.keys()) == 0: extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")), int(wkflow.cp.get("workflow", "end-time"))) else: pltpad = [ science_segs.extent_all()[1] - trigger_time, trigger_time - science_segs.extent_all()[0] ] extent = segments.segmentlist([ science_segs.extent_all(), segments.segment(trigger_time - pltpad[0], trigger_time + pltpad[1]) ]).extent() ifo_colors = {} for ifo in ifos: ifo_colors[ifo] = ifo_color(ifo) if ifo not in science_segs.keys(): science_segs[ifo] = segments.segmentlist([]) # Make plot fig, subs = plt.subplots(len(ifos), sharey=True) plt.xticks(rotation=20, ha='right') for sub, ifo in zip(subs, ifos): for seg in science_segs[ifo]: sub.add_patch( Rectangle((seg[0], 0.1), abs(seg), 0.8, facecolor=ifo_colors[ifo], edgecolor='none')) if coherent_seg: if len(science_segs[ifo]) > 0 and \ coherent_seg in science_segs[ifo]: sub.plot([trigger_time, trigger_time], [0, 1], '-', c='orange') sub.add_patch( Rectangle((coherent_seg[0], 0), abs(coherent_seg), 1, alpha=0.5, facecolor='orange', edgecolor='none')) else: sub.plot([trigger_time, trigger_time], [0, 1], ':', c='orange') sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1], '--', c='orange', alpha=0.5) sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1], '--', c='orange', alpha=0.5) else: sub.plot([trigger_time, trigger_time], [0, 1], ':k') if fail_criterion: if len(science_segs[ifo]) > 0: style_str = '--' else: style_str = '-' sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1], style_str, c='black', alpha=0.5) sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1], style_str, c='black', alpha=0.5) sub.set_frame_on(False) sub.set_yticks([]) sub.set_ylabel(ifo, rotation=45) sub.set_ylim([0, 1]) sub.set_xlim([float(extent[0]), float(extent[1])]) sub.get_xaxis().get_major_formatter().set_useOffset(False) sub.get_xaxis().get_major_formatter().set_scientific(False) sub.get_xaxis().tick_bottom() if sub is subs[-1]: sub.tick_params(labelsize=10, pad=1) else: sub.get_xaxis().set_ticks([]) sub.get_xaxis().set_ticklabels([]) xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval() ymin, _ = fig.axes[-1].get_yaxis().get_view_interval() fig.axes[-1].add_artist( Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) fig.axes[-1].set_xlabel('GPS Time') fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name) plt.tight_layout() fig.subplots_adjust(hspace=0) plot_name = 'GRB%s_segments.png' % trigger_name plot_url = 'file://localhost%s/%s' % (out_dir, plot_name) fig.savefig('%s/%s' % (out_dir, plot_name)) return [ifos, plot_name, extent, plot_url]
def query_segments(engine, table, segdefs): # each segdef is a list containing: # ifo, name, version, start_time, end_time, start_pad, end_pad # The trivial case: if there's nothing to do, return no time if len(segdefs) == 0: return [ segmentlist([]) ] # # For the sake of efficiency we query the database for all the segdefs at once # This constructs a clause that looks for one # def make_clause(table, segdef): ifo, name, version, start_time, end_time, start_pad, end_pad = segdef sql = " (segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%d > %s.end_time OR %s.start_time > %d)) " % (start_time, table, table, end_time) return sql clauses = [make_clause(table, segdef) for segdef in segdefs] sql = 'SELECT segment_definer.ifos, segment_definer.name, segment_definer.version, ' sql += ' %s.start_time, %s.end_time ' % (table, table) sql += ' FROM segment_definer, %s ' % table sql += ' WHERE %s.segment_def_id = segment_definer.segment_def_id AND ' % table if engine.__class__ == query_engine.LdbdQueryEngine: sql += " %s.segment_def_cdb = segment_definer.creator_db AND " % table sql += '( ' + ' OR '.join(clauses) + ' )' rows = engine.query(sql) # # The result of a query will be rows of the form # ifo, name, version, start_time, end_time # # We want to associate each returned row with the segdef it belongs to so that # we can apply the correct padding. # # If segdefs were uniquely spcified by (ifo, name, version) this would # be easy, but it may happen that we're looking for the same segment definer # at multiple disjoint times. In particular this can happen if the user # didn't specify a version number; in that case we might have version 2 # of some flag defined over multiple disjoint segment_definers. # results = [] for segdef in segdefs: ifo, name, version, start_time, end_time, start_pad, end_pad = segdef search_span = segment(start_time, end_time) search_span_list = segmentlist([search_span]) # See whether the row belongs to the current segdef. Name, ifo and version must match # and the padded segment must overlap with the range of the segdef. def matches(row): return ( row[0].strip() == ifo and row[1] == name and int(row[2]) == int(version) and search_span.intersects(segment(row[3] + start_pad, row[4] + start_pad)) ) # Add the padding. Segments may extend beyond the time of interest, chop off the excess. def pad_and_truncate(row_start, row_end): tmp = segmentlist([segment(row_start + start_pad, row_end + end_pad)]) # No coalesce needed as a list with a single segment is already coalesced tmp &= search_span_list # The intersection is guaranteed to be non-empty if the row passed match() # PR 2969: The above comment is incorrect. Negative padding may cause # an empty intersection. if len(tmp) == 0: return segment(0,0) else: return tmp[0] # Build a segment list from the returned segments, padded and trunctated. The segments will # not necessarily be disjoint, if the padding crosses gaps. They are also not gauranteed to # be in order, since there's no ORDER BY in the query. So the list needs to be coalesced # before arithmatic can be done with it. result = segmentlist( [pad_and_truncate(row[3], row[4]) for row in rows if matches(row)] ).coalesce() # This is not needed: since each of the segments are constrained to be within the search # span the whole list must be as well. # result &= search_span_list results.append(result) return results
def make_grb_segments_plot(wkflow, science_segs, trigger_time, trigger_name, out_dir, coherent_seg=None, fail_criterion=None): ifos = wkflow.ifos if len(science_segs.keys()) == 0: extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")), int(wkflow.cp.get("workflow", "end-time"))) else: pltpad = [science_segs.extent_all()[1] - trigger_time, trigger_time - science_segs.extent_all()[0]] extent = segments.segmentlist([science_segs.extent_all(), segments.segment(trigger_time - pltpad[0], trigger_time + pltpad[1])]).extent() ifo_colors = {} for ifo in ifos: ifo_colors[ifo] = ifo_color(ifo) if ifo not in science_segs.keys(): science_segs[ifo] = segments.segmentlist([]) # Make plot fig, subs = plt.subplots(len(ifos), sharey=True) plt.xticks(rotation=20, ha='right') for sub, ifo in zip(subs, ifos): for seg in science_segs[ifo]: sub.add_patch(Rectangle((seg[0], 0.1), abs(seg), 0.8, facecolor=ifo_colors[ifo], edgecolor='none')) if coherent_seg: if len(science_segs[ifo]) > 0 and \ coherent_seg in science_segs[ifo]: sub.plot([trigger_time, trigger_time], [0, 1], '-', c='orange') sub.add_patch(Rectangle((coherent_seg[0], 0), abs(coherent_seg), 1, alpha=0.5, facecolor='orange', edgecolor='none')) else: sub.plot([trigger_time, trigger_time], [0, 1], ':', c='orange') sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1], '--', c='orange', alpha=0.5) sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1], '--', c='orange', alpha=0.5) else: sub.plot([trigger_time, trigger_time], [0, 1], ':k') if fail_criterion: if len(science_segs[ifo]) > 0: style_str = '--' else: style_str = '-' sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1], style_str, c='black', alpha=0.5) sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1], style_str, c='black', alpha=0.5) sub.set_frame_on(False) sub.set_yticks([]) sub.set_ylabel(ifo, rotation=45) sub.set_ylim([0, 1]) sub.set_xlim([float(extent[0]), float(extent[1])]) sub.get_xaxis().get_major_formatter().set_useOffset(False) sub.get_xaxis().get_major_formatter().set_scientific(False) sub.get_xaxis().tick_bottom() if sub is subs[-1]: sub.tick_params(labelsize=10, pad=1) else: sub.get_xaxis().set_ticks([]) sub.get_xaxis().set_ticklabels([]) xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval() ymin, _ = fig.axes[-1].get_yaxis().get_view_interval() fig.axes[-1].add_artist(Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) fig.axes[-1].set_xlabel('GPS Time') fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name) plt.tight_layout() fig.subplots_adjust(hspace=0) plot_name = 'GRB%s_segments.png' % trigger_name plot_url = 'file://localhost%s/%s' % (out_dir, plot_name) fig.savefig('%s/%s' % (out_dir, plot_name)) return [ifos, plot_name, extent, plot_url]
# SCIENCE_OK sciokSegFile = segIfoFiles.find_output_with_tag('SCIENCE_OK') assert (len(sciokSegFile) == 1) sciokSegFile = sciokSegFile[0] sciokSegs = sciokSegFile.segmentList # SCIENCE_AVAILABLE sciavailableSegFile = segIfoFiles.find_output_with_tag('SCIENCE_AVAILABLE') assert (len(sciavailableSegFile) == 1) sciavailableSegFile = sciavailableSegFile[0] sciavailableSegs = sciavailableSegFile.segmentList # ANALYSABLE - This one needs to come from inspiral outs analysableSegs = insps.get_times_covered_by_files() # And add these to the output file # Start with the segment summary summSegs = segments.segmentlist([workflow.analysis_time]) sci_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE", 0) sciok_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_OK", 0) sciavailable_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_AVAILABLE", 0) analysable_def_id = segmentdb_utils.add_to_segment_definer( outdoc, proc_id, ifo, "CBC_DAYHOPE_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sciSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciok_def_id, sciokSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, sciavailable_def_id, sciavailableSegs) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysableSegs)
def coalesce_seg(database, start_time, end_time): ret = 0 #assume execution successufl try: st = int(start_time) et = int(end_time) db = str(database.strip()) #------------------------------------------------------------------- # Set up environment and get needed values #------------------------------------------------------------------- # Set up connection to the database dbconn = DB2.connect(dsn=db, uid='', pwd='', autoCommit=True) curs = dbconn.cursor() # create a new process_id sql = "select hex(GENERATE_UNIQUE()) from sysibm.sysdummy1" curs.execute(sql) hex_procid = curs.fetchone()[0] process_id = 'x' + '\'' + hex_procid + '\'' # determine the local creator_db sql = "SELECT DEFAULT FROM SYSCAT.COLUMNS WHERE " sql += "TABNAME = 'PROCESS' AND COLNAME = 'CREATOR_DB'" curs.execute(sql) creator_db = int(curs.fetchone()[0]) # prepare values for the new row to be inserted into the process table program = os.path.abspath(sys.argv[0]) node = socket.gethostname() username = pwd.getpwuid(os.getuid()).pw_name unix_procid = os.getpid() proc_start_time = gpstime.GpsSecondsFromPyUTC(time.time()) end_time = None jobid = 0 domain = 'coalesce_local' # insert new row into process table sql = "INSERT INTO process " sql += "(program, is_online, node, username, unix_procid, start_time, jobid, domain, process_id, creator_db) " sql += "VALUES ('%s', 0, '%s', '%s', %d, %d, %d, '%s',%s, %d)" % (program, node, username, unix_procid, proc_start_time, jobid, domain, process_id, creator_db) curs.execute(sql) # get the BLOB process_id for later reference sql = "SELECT BLOB(process_id) from process where hex(process_id)='%s' " % hex_procid curs.execute(sql) blob_procid = curs.fetchone()[0] #======================================================================== # # Main # #======================================================================== # Algorithm: # 1. Find distinct version 1 segment type from segment_summary table witnin start_time, end_time range # 2. Find segments and intervals to coalesce # 3. Coalesce segments and intervals # 4. Insert coaleseced segments back in to the database # 5. Delete uncoalesced segments and intervals from the database # 1. Find distinct segment types matching our criteria from segment_summary within the specified time range sql = "SELECT distinct(hex(segment_summary.segment_def_id)) FROM segment_summary, segment_definer, process " sql += "WHERE segment_summary.segment_def_id=segment_definer.segment_def_id " sql += "AND segment_summary.segment_def_cdb=segment_definer.creator_db " sql += "AND segment_summary.process_id=process.process_id " sql += "AND segment_summary.creator_db=process.creator_db " # Removed next line so that all segments are coalesced: this will be slower up front but faster for queries and the long run #sql += "AND ((segment_definer.name like 'DMT-%' and segment_definer.version=1) or (process.ifos='V1' and process.program='SegOnline')) " sql += "AND segment_summary.start_time <=%d " % et sql += "AND segment_summary.end_time >= %d " % st curs.execute(sql) def_ids = curs.fetchall() if not def_ids: data_existence = 0 else: data_existence = 1 # loop in the segment types to fetch, coalesce, insert and delete for d in def_ids: # get the BLOB segment_def_id for later use sql = "SELECT BLOB(segment_def_id), ifos, name, version, creator_db " sql += "FROM segment_definer " sql += "WHERE hex(segment_def_id) = '%s' " % d[0] curs.execute(sql) result = curs.fetchone() blob_defid = result[0] ifos = result[1].strip() name = result[2] ver = result[3] def_cdb = result[4] # 2. Find segments and intervals to coalesce # get the segment start_time, end_time to coalesce, and according primary key to delete try: curs.execute("drop view seg_view") except: pass sql = "CREATE view seg_view (st,et,seg_id) AS " sql += "SELECT start_time,end_time, segment_id from segment " sql += "WHERE hex(segment_def_id) = '%s' " % d[0] sql += "AND segment.start_time <=%d " % et sql += "AND segment.end_time >= %d " % st print >> sys.stdout, ("Selecting segments to coalesce for %s version:%d %s ... " % (ifos,ver, name)) curs.execute(sql) curs.execute("SELECT st,et from seg_view") seg_bf_cos = curs.fetchall() # get the segments to coalesce # get the summary start_time, end_time to coalesce, and according primary key to delete try: curs.execute("drop view sum_view") except: pass sql = "CREATE view sum_view (st,et,sum_id) AS " sql += "SELECT start_time,end_time, segment_sum_id from segment_summary " sql += "WHERE hex(segment_def_id) = '%s' " % d[0] sql += "AND segment_summary.start_time <=%d " % et sql += "AND segment_summary.end_time >= %d " % st curs.execute(sql) curs.execute("SELECT st,et from sum_view") sum_bf_cos = curs.fetchall() # get the summarys to coalesce # 3. Coalesce segments and intervals print >> sys.stdout, "Coalescing segments ... " segs = segments.segmentlist([]) sums = segments.segmentlist([]) for bf in seg_bf_cos: seg = segments.segment(int(bf[0]), int(bf[1])) segs.append(seg) for bf in sum_bf_cos: sum = segments.segment(int(bf[0]), int(bf[1])) sums.append(sum) segs.coalesce() sums.coalesce() # 4. Insert coaleseced segments back in to the database # insert coalesced segs into segment table insert_list = [] for s in segs: # generate unique id for insertion curs.execute("VALUES BLOB(GENERATE_UNIQUE())") prim_id = curs.fetchone()[0] # generate a list of values to insert using executemany() insert_list.append((prim_id, creator_db, s[0], s[1], blob_defid, def_cdb, blob_procid)) sql = "INSERT INTO segment " sql += "(segment_id, creator_db, start_time, end_time, segment_def_id, segment_def_cdb, process_id) " sql += "VALUES (?,?,?,?,?,?,?) " print >> sys.stdout, "Inserting coalesced segments back in ... " curs.executemany(sql, insert_list) # insert coalesced sums into segment_summary table insert_list = [] for s in sums: # generate unique id for insertion curs.execute("VALUES BLOB(GENERATE_UNIQUE())") prim_id = curs.fetchone()[0] # generate a list of values to insert using executemany() insert_list.append((prim_id, creator_db, s[0], s[1], blob_defid, def_cdb, blob_procid)) sql = "INSERT INTO segment_summary " sql += "(segment_sum_id, creator_db, start_time, end_time, segment_def_id, segment_def_cdb, process_id) " sql += "VALUES (?,?,?,?,?,?,?) " curs.executemany(sql, insert_list) # 5. Delete uncoalesced segments and intervals from the database print >> sys.stdout, "Deleting un-coaleseced segments ... " print >> sys.stdout sql = "DELETE FROM segment " sql += "WHERE segment_id in (select seg_id from seg_view) " sql += "AND process_id != %s " % process_id curs.execute(sql) sql = "DELETE FROM segment_summary " sql += "WHERE segment_sum_id in (select sum_id from sum_view) " sql += "AND process_id != %s " % process_id curs.execute(sql) # update end_time in process table sql = "update process set end_time=%d where hex(process_id)='%s' " % (gpstime.GpsSecondsFromPyUTC(time.time()),hex_procid) curs.execute(sql) try: curs.execute("drop view seg_view") curs.execute("drop view sum_view") except: pass curs.close() except Exception,e: ret = str(e) print >> sys.stdout, ("%s" % ret)
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None, tags=None): """ Setup datafind section of the workflow. This section is responsible for generating, or setting up the workflow to generate, a list of files that record the location of the frame files needed to perform the analysis. There could be multiple options here, the datafind jobs could be done at run time or could be put into a dag. The subsequent jobs will know what was done here from the OutFileList containing the datafind jobs (and the Dagman nodes if appropriate. For now the only implemented option is to generate the datafind files at runtime. This module can also check if the frameFiles actually exist, check whether the obtained segments line up with the original ones and update the science segments to reflect missing data files. Parameters ---------- workflow: pycbc.workflow.core.Workflow The workflow class that stores the jobs that will be run. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. outputDir : path All output files written by datafind processes will be written to this directory. seg_file : SegFile, optional (default=None) The file returned by get_science_segments containing the science segments and the associated segment_summary. This will be used for the segment_summary test and is required if, and only if, performing that test. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniqueify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- datafindOuts : OutGroupList List of all the datafind output files for use later in the pipeline. sci_avlble_file : SegFile SegFile containing the analysable time after checks in the datafind module are applied to the input segment list. For production runs this is expected to be equal to the input segment list. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. If the updateSegmentTimes kwarg is given this will be updated to reflect any instances of missing data. sci_avlble_name : string The name with which the analysable time is stored in the sci_avlble_file. """ if tags is None: tags = [] logging.info("Entering datafind module") make_analysis_dir(outputDir) cp = workflow.cp # Parse for options in ini file datafind_method = cp.get_opt_tags("workflow-datafind", "datafind-method", tags) if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps", tags): checkSegmentGaps = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-gaps", tags) else: checkSegmentGaps = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist", tags): checkFramesExist = cp.get_opt_tags("workflow-datafind", "datafind-check-frames-exist", tags) else: checkFramesExist = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-segment-summary", tags): checkSegmentSummary = cp.get_opt_tags( "workflow-datafind", "datafind-check-segment-summary", tags) else: checkSegmentSummary = "no_test" logging.info("Starting datafind with setup_datafind_runtime_generated") if datafind_method == "AT_RUNTIME_MULTIPLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_SINGLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_SINGLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_FAKE_DATA": pass elif datafind_method == "FROM_PREGENERATED_LCF_FILES": ifos = scienceSegs.keys() datafindcaches, datafindouts = \ setup_datafind_from_pregenerated_lcf_files(cp, ifos, outputDir, tags=tags) else: msg = """Entry datafind-method in [workflow-datafind] does not have " expected value. Valid values are AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES AT_RUNTIME_MULTIPLE_CACHES, AT_RUNTIME_SINGLE_CACHES, FROM_PREGENERATED_LCF_FILES, or AT_RUNTIME_FAKE_DATA. Consult the documentation for more info.""" raise ValueError(msg) using_backup_server = False if datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES" or \ datafind_method == "AT_RUNTIME_SINGLE_FRAMES": if cp.has_option_tags("workflow-datafind", "datafind-backup-datafind-server", tags): using_backup_server = True backup_server = cp.get_opt_tags("workflow-datafind", "datafind-backup-datafind-server", tags) cp_new = copy.deepcopy(cp) cp_new.set("workflow-datafind", "datafind-ligo-datafind-server", backup_server) cp_new.set('datafind', 'urltype', 'gsiftp') backup_datafindcaches, backup_datafindouts =\ setup_datafind_runtime_frames_single_call_perifo(cp_new, scienceSegs, outputDir, tags=tags) backup_datafindouts = datafind_keep_unique_backups(\ backup_datafindouts, datafindouts) datafindcaches.extend(backup_datafindcaches) datafindouts.extend(backup_datafindouts) logging.info("setup_datafind_runtime_generated completed") # If we don't have frame files covering all times we can update the science # segments. if checkSegmentGaps in ['warn', 'update_times', 'raise_error']: logging.info("Checking science segments against datafind output....") newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) logging.info("New segments calculated from data find output.....") missingData = False for ifo in scienceSegs.keys(): # If no science segments in input then do nothing if not scienceSegs[ifo]: msg = "No science segments are present for ifo %s, " % (ifo) msg += "the segment metadata indicates there is no analyzable" msg += " strain data between the selected GPS start and end " msg += "times." logging.warning(msg) continue if not newScienceSegs.has_key(ifo): msg = "No data frames were found corresponding to the science " msg += "segments for ifo %s" % (ifo) logging.error(msg) missingData = True if checkSegmentGaps == 'update_times': scienceSegs[ifo] = segments.segmentlist() continue missing = scienceSegs[ifo] - newScienceSegs[ifo] if abs(missing): msg = "From ifo %s we are missing frames covering:" % (ifo) msg += "\n%s" % "\n".join(map(str, missing)) missingData = True logging.error(msg) if checkSegmentGaps == 'update_times': # Remove missing time, so that we can carry on if desired logging.info("Updating science segments for ifo %s." % (ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missing if checkSegmentGaps == 'raise_error' and missingData: raise ValueError("Workflow cannot find needed data, exiting.") logging.info("Done checking, any discrepancies are reported above.") elif checkSegmentGaps == 'no_test': pass else: errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Do all of the frame files that were returned actually exist? if checkFramesExist in ['warn', 'update_times', 'raise_error']: logging.info("Verifying that all frames exist on disk.") missingFrSegs, missingFrames = \ get_missing_segs_from_frame_file_cache(datafindcaches) missingFlag = False for ifo in missingFrames.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: continue # If using a backup server, does the frame exist remotely? if using_backup_server: # WARNING: This will be slow, but hopefully it will not occur # for too many frames. This could be optimized if # it becomes necessary. new_list = [] for frame in missingFrames[ifo]: for dfout in datafindouts: dfout_pfns = list(dfout.pfns) dfout_urls = [a.url for a in dfout_pfns] if frame.url in dfout_urls: pfn = dfout_pfns[dfout_urls.index(frame.url)] dfout.removePFN(pfn) if len(dfout.pfns) == 0: new_list.append(frame) else: msg = "Frame %s not found locally. "\ %(frame.url,) msg += "Replacing with remote url(s) %s." \ %(str([a.url for a in dfout.pfns]),) logging.info(msg) break else: new_list.append(frame) missingFrames[ifo] = new_list if missingFrames[ifo]: msg = "From ifo %s we are missing the following frames:" % ( ifo) msg += '\n'.join([a.url for a in missingFrames[ifo]]) missingFlag = True logging.error(msg) if checkFramesExist == 'update_times': # Remove missing times, so that we can carry on if desired logging.info("Updating science times for ifo %s." % (ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo] if checkFramesExist == 'raise_error' and missingFlag: raise ValueError("Workflow cannot find all frames, exiting.") logging.info("Finished checking frames.") elif checkFramesExist == 'no_test': pass else: errMsg = "checkFramesExist kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Check if there are cases where frames exist, but no entry in the segment # summary table are present. if checkSegmentSummary in ['warn', 'raise_error']: logging.info("Checking the segment summary table against frames.") dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) missingFlag = False # NOTE: Should this be overrideable in the config file? sci_seg_name = "SCIENCE" if seg_file is None: err_msg = "You must provide the science segments SegFile object " err_msg += "if using the datafind-check-segment-summary option." raise ValueError(err_msg) if seg_file.seg_summ_dict is None: err_msg = "The provided science segments SegFile object must " err_msg += "contain a valid segment_summary table if using the " err_msg += "datafind-check-segment-summary option." raise ValueError(err_msg) seg_summary_times = seg_file.seg_summ_dict for ifo in dfScienceSegs.keys(): curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name] missing = (dfScienceSegs[ifo] & seg_file.valid_segments) missing.coalesce() missing = missing - curr_seg_summ_times missing.coalesce() scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo] scienceButNotFrame.coalesce() missing2 = scienceSegs[ifo] - scienceButNotFrame missing2.coalesce() missing2 = missing2 - curr_seg_summ_times missing2.coalesce() if abs(missing): msg = "From ifo %s the following times have frames, " % (ifo) msg += "but are not covered in the segment summary table." msg += "\n%s" % "\n".join(map(str, missing)) logging.error(msg) missingFlag = True if abs(missing2): msg = "From ifo %s the following times have frames, " % (ifo) msg += "are science, and are not covered in the segment " msg += "summary table." msg += "\n%s" % "\n".join(map(str, missing2)) logging.error(msg) missingFlag = True if checkSegmentSummary == 'raise_error' and missingFlag: errMsg = "Segment_summary discrepancy detected, exiting." raise ValueError(errMsg) elif checkSegmentSummary == 'no_test': pass else: errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', " errMsg += "'warn', or 'raise_error'." raise ValueError(errMsg) # Now need to create the file for SCIENCE_AVAILABLE sci_avlble_dict = segments.segmentlistdict() # NOTE: Should this be overrideable in the config file? sci_avlble_name = "SCIENCE_AVAILABLE" for ifo in scienceSegs.keys(): sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo] sci_avlble_file = SegFile.from_segment_list_dict( 'SCIENCE_AVAILABLE', sci_avlble_dict, ifo_list=scienceSegs.keys(), valid_segment=workflow.analysis_time, extension='.xml', tags=tags, directory=outputDir) logging.info("Leaving datafind module") if datafind_method == "AT_RUNTIME_FAKE_DATA": datafindouts = None else: datafindouts = FileList(datafindouts) return datafindouts, sci_avlble_file, scienceSegs, sci_avlble_name
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Find the framefiles for the given type in the [start, end) interval frame @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @param match: regular expression to match against @param urltype: file scheme to search for (e.g. 'file') @param on_gaps: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} @type match: L{str} @type urltype: L{str} @type on_gaps: L{str} @returns: L{Cache<pycbc_glue.lal.Cache>} @raises RuntimeError: if gaps are found and C{on_gaps='error'} """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps == "warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Find the framefiles for the given type in the [start, end) interval frame @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @param match: regular expression to match against @param urltype: file scheme to search for (e.g. 'file') @param on_gaps: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} @type match: L{str} @type urltype: L{str} @type on_gaps: L{str} @returns: L{Cache<pycbc_glue.lal.Cache>} @raises RuntimeError: if gaps are found and C{on_gaps='error'} """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps=="warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)
def vote(seglists, n): """ Given a sequence of segmentlists, returns the intervals during which at least n of them intersect. The input segmentlists must be coalesced, the output is coalesced. Example: >>> from pycbc_glue.segments import * >>> w = segmentlist([segment(0, 15)]) >>> x = segmentlist([segment(5, 20)]) >>> y = segmentlist([segment(10, 25)]) >>> z = segmentlist([segment(15, 30)]) >>> vote((w, x, y, z), 3) [segment(10, 20)] The sequence of segmentlists is only iterated over once, and the segmentlists within it are only iterated over once; they can all be generators. If there are a total of N segments in M segment lists and the final result has L segments the algorithm is O(N M) + O(L). """ # check for no-op if n < 1: return segments.segmentlist() # digest the segmentlists into an ordered sequence of off-on and # on-off transitions with the vote count for each transition # FIXME: this generator is declared locally for now, is it useful # as a stand-alone generator? def pop_min(l): # remove and return the smallest value from a list val = min(l) for i in xrange(len(l) - 1, -1, -1): if l[i] is val: return l.pop(i) assert False # cannot get here def vote_generator(seglists): queue = [] for seglist in seglists: segiter = iter(seglist) try: seg = segiter.next() except StopIteration: continue # put them in so that the smallest boundary is # closest to the end of the list queue.append((seg[1], -1, segiter)) queue.append((seg[0], +1, None)) if not queue: return queue.sort(reverse=True) bound = queue[-1][0] votes = 0 while queue: this_bound, delta, segiter = pop_min(queue) if this_bound == bound: votes += delta else: yield bound, votes bound = this_bound votes = delta if segiter is not None: try: seg = segiter.next() except StopIteration: continue queue.append((seg[1], -1, segiter)) queue.append((seg[0], +1, None)) yield bound, votes # compute the cumulative sum of votes, and assemble a segmentlist # from the intervals when the vote count is equal to or greater # than n result = segments.segmentlist() votes = 0 for bound, delta in vote_generator(seglists): if delta > 0 and n - delta <= votes < n: start = bound elif delta < 0 and n <= votes < n - delta: result.append(segments.segment(start, bound)) del start # detect stops that aren't preceded by starts votes += delta assert votes == 0 # detect failed cumulative sum return result