def subdivide(seg, length, min_len=0): """ Subdivide a segment into smaller segments based on a given length. Enforce a given minimum length at the end, if necessary. If the remainder segment is smaller than the minimum length, then the last two segments will span the remainder plus penultimate segment, with the span divided evenly between the two. Input segment: (0, 10] subdivide 3 min 2 Output segment(s): (0, 3], (3, 6], (6, 8], (8, 10] """ assert length >= min_len if abs(seg) < min_len: return segmentlist([]) if abs(seg) <= length: return segmentlist([seg]) subsegl = segmentlist([]) for i in range(int(float(abs(seg))/length)): st = seg[0] subsegl.append(segment(st+length*i, st+length*(i+1))) # Make an attempt to subdivide evenly. if float(abs(seg)) % length <= min_len: s1 = subsegl.pop() rem_len = float(abs(s1)) + (float(abs(seg)) % length) s2 = segment(seg[1]-rem_len/2, seg[1]) s1 = segment(s1[0], seg[1]-rem_len/2) subsegl.append(s1) subsegl.append(s2) else: subsegl.append(segment(subsegl[-1][1], seg[1])) return subsegl
def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 # get rid of duplicate segments which happen when splitting the bank segments = segmentlist(frozenset(segments)) segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = SegFile.from_segment_list('%s_%s' %(segment_name, i), segmentlist(segs), segment_name, ifo, valid_segment=workflow.analysis_time, extension='xml', directory=out_dir) psd_files += [make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, tags=tags + ['PART%s' % i])] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, gate_files=None, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 # get rid of duplicate segments which happen when splitting the bank segments = segmentlist(frozenset(segments)) segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = segments_to_file(segmentlist(segs), out_dir + '/%s-INSPIRAL_DATA-%s.xml' % (ifo, i), 'INSPIRAL_DATA', ifo=ifo) psd_files += [make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, gate_files=gate_files, tags=tags + ['PART%s' % i])] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def run_query_segments( doc, proc_id, engine, gps_start_time, gps_end_time, included_segments_string, excluded_segments_string=None, write_segments=True, start_pad=0, end_pad=0, ): """Runs a segment query. This was originally part of ligolw_query_segments, but now is also used by ligolw_segments_from_cats. The write_segments option is provided so callers can coalesce segments obtained over sever invocations (as segments_from_cats does). """ if write_segments: all_ifos = {} for ifo, segment_name, version in split_segment_ids(included_segments_string.split(",")): all_ifos[ifo] = True new_seg_def_id = add_to_segment_definer(doc, proc_id, "".join(all_ifos.keys()), "result", 0) add_to_segment_summary(doc, proc_id, new_seg_def_id, [[gps_start_time, gps_end_time]]) result = segmentlist([]) for ifo, segment_name, version in split_segment_ids(included_segments_string.split(",")): sum_segments, seg_segments = build_segment_list( engine, gps_start_time, gps_end_time, ifo, segment_name, version, start_pad, end_pad ) seg_def_id = add_to_segment_definer(doc, proc_id, ifo, segment_name, version) add_to_segment_summary(doc, proc_id, seg_def_id, sum_segments) # and accumulate segments result |= seg_segments # Excluded segments are not required if excluded_segments_string: excluded_segments = segmentlist([]) for ifo, segment_name, version in split_segment_ids(excluded_segments_string.split(",")): sum_segments, seg_segments = build_segment_list( engine, gps_start_time, gps_end_time, ifo, segment_name, version ) excluded_segments |= seg_segments result = result - excluded_segments result.coalesce() # Add the segments if write_segments: add_to_segment(doc, proc_id, new_seg_def_id, result) return result
def generated_vdb_ascii(json_dict,filepath): #res_dict=json.loads(json_str) res_dict=json_dict active_list=res_dict['active'] active_segments=segments.segmentlist([segments.segment(x[0],x[1]) for x in active_list]) active_segments.coalesce() known_list=res_dict['known'] known_segments=segments.segmentlist([segments.segment(x[0],x[1]) for x in known_list]) known_segments.coalesce() query_start=res_dict['query_information']['start'] query_stop=res_dict['query_information']['end'] if query_start!=0 and query_stop!=0: requested_span=segments.segmentlist([segments.segment(query_start,query_stop)]) else: requested_span=segments.segmentlist([segments.segment(0,9999999999)]) active_segments_string=',1 \n'.join([str(i[0])+","+str(i[1]) for i in active_segments])+",1 \n" unknown_segments=requested_span-known_segments unknown_segments_string=',-1 \n'.join([str(i[0])+","+str(i[1]) for i in unknown_segments])+",-1 \n" known_not_active_segments=known_segments-active_segments known_not_active_segments_string=',0 \n'.join([str(i[0])+","+str(i[1]) for i in known_not_active_segments])+",0 \n" output_fileh=open(filepath,'a') query_info_string=json.dumps(res_dict['query_information'], indent=1) output_fileh.writelines(query_info_string) output_fileh.write('\n') output_fileh.writelines(active_segments_string) output_fileh.writelines(unknown_segments_string) output_fileh.writelines(known_not_active_segments_string) output_fileh.close() return filepath
def inj_seg(self, exclude_coinc_flags=None): """ Returns a segmentlist that is the union of all excitation, segdb and bitmasked channels. """ if exclude_coinc_flags is None: exclude_coinc_flags = [] tmp_list = segments.segmentlist([]) for key in self.exc_dict.keys(): if key[3:] not in exclude_coinc_flags: tmp_list.extend(self.exc_dict[key]) for key in self.seg_dict.keys(): if key[3:] not in exclude_coinc_flags: tmp_list.extend(self.seg_dict[key]) for key in self.bitmask_dict.keys(): if key[3:] not in exclude_coinc_flags: tmp_list.extend(self.bitmask_dict[key]) if self.schedule_time: seg = segments.segment(self.schedule_time, self.schedule_time + 1) seg_list = segments.segmentlist([seg]) tmp_list.extend(seg_list) for time in self.gracedb_time: seg = segments.segment(time, time + 1) seg_list = segments.segmentlist([seg]) tmp_list.extend(seg_list) return tmp_list
def generated_vdb_ascii(json_str, filepath): res_dict = json.loads(json_str) active_list = res_dict["active"] active_segments = segments.segmentlist([segments.segment(x[0], x[1]) for x in active_list]) known_list = res_dict["known"] known_segments = segments.segmentlist([segments.segment(x[0], x[1]) for x in known_list]) query_start = res_dict["query_information"]["start"] query_stop = res_dict["query_information"]["end"] if query_start != 0 and query_stop != 0: requested_span = segments.segmentlist([segments.segment(query_start, query_stop)]) else: requested_span = segments.segmentlist([segments.segment(0, 9999999999)]) active_segments_string = ",1 \n".join([str(i[0]) + "," + str(i[1]) for i in active_segments]) + ",1 \n" unknown_segments = requested_span - known_segments unknown_segments_string = ",-1 \n".join([str(i[0]) + "," + str(i[1]) for i in unknown_segments]) + ",-1 \n" known_not_active_segments = known_segments - active_segments known_not_active_segments_string = ( ",0 \n".join([str(i[0]) + "," + str(i[1]) for i in known_not_active_segments]) + ",0 \n" ) output_fileh = open(filepath, "w+") query_info_string = json.dumps(res_dict["query_information"], indent=1) output_fileh.writelines(query_info_string) output_fileh.write("\n") output_fileh.writelines(active_segments_string) output_fileh.writelines(unknown_segments_string) output_fileh.writelines(known_not_active_segments_string) output_fileh.close() return filepath
def fetch(self, channel, start, end): """ Retrieve data, caching file locations and the files themselves. """ seg = segment(start, end) if not self._query(channel, start, end): raise ValueError("%s not found in cache" % repr(segmentlist([seg]) - self._remotecoverage)) # Need to cache files locally # Note: seg *will* be in self._cachecoverage if self.scratchdir is None. if seg not in self._cachecoverage: for f,s in zip(self._remotefiles, self._remotesegs): if seg.intersects(s) and s not in self._cachecoverage: dest = os.path.join(self._scratchdir, os.path.split(f)[-1]) if self._verbose: print "Copying %s -->\n %s." % (f, dest) shutil.copy(f, dest) ind = bisect_right(self._cachedsegs, s) self._cachedfiles.insert(ind, dest) self._cachedsegs.insert(ind, s) self._cachecoverage |= segmentlist([s]) assert seg in self._cachecoverage # Finally, return the cached data return self._fetch(channel, start, end)
def __init__(self, active = (), valid = (), instruments = (), name = None, version = None, comment = None): """ Initialize a new LigolwSegmentList instance. active and valid are sequences that will be cast to segments.segmentlist objects. They can be generator expressions. The "active" sequence is what is usually thought of as the segment list, the "valid" sequence identifies the intervals of time for which the segment list's state is defined. """ # if we've only been passed an argument for active, see if # it's an object with the same attributes as ourselves and # if so initialize ourself as a copy of it. if not valid and not instruments and name is None and version is None and comment is None: try: self.valid = segments.segmentlist(active.valid) self.active = segments.segmentlist(active.active) self.instruments = set(active.instruments) self.name = active.name self.version = active.version self.comment = active.comment return except AttributeError: pass # we had more than one argument or it didn't have the # correct attributes, so do a normal initialization. make # copies of mutable objects to avoid confusion self.valid = segments.segmentlist(valid) self.active = segments.segmentlist(active) self.instruments = set(instruments) self.name = name self.version = version self.comment = comment
def __init__(self,ifo,name,version): self.known=segments.segmentlist([]) self.active=segments.segmentlist([]) # self.metadata={} self.flagDict={} self.ifo=ifo self.name=name self.version=version
def __init__(self, instrument): self.fig, self.axes = SnglBurstUtils.make_burst_plot("%s Confidence" % instrument, "Coincident Event Rate (Hz)") self.instrument = instrument self.foreground = [] self.background = [] self.foreground_segs = segments.segmentlist() self.background_segs = segments.segmentlist() self.axes.loglog()
def fromsegmentxml(file, dict=False, id=None): """ Read a glue.segments.segmentlist from the file object file containing an xml segment table. Arguments: file : file object file object for segment xml file Keyword Arguments: dict : [ True | False ] returns a glue.segments.segmentlistdict containing coalesced glue.segments.segmentlists keyed by seg_def.name for each entry in the contained segment_def_table. Default False id : int returns a glue.segments.segmentlist object containing only those segments matching the given segment_def_id integer """ # load xmldocument and SegmentDefTable and SegmentTables xmldoc, digest = utils.load_fileobj(file, gz=file.name.endswith(".gz")) seg_def_table = table.get_table(xmldoc, lsctables.SegmentDefTable.tableName) seg_table = table.get_table(xmldoc, lsctables.SegmentTable.tableName) if dict: segs = segments.segmentlistdict() else: segs = segments.segmentlist() seg_id = {} for seg_def in seg_def_table: seg_id[int(seg_def.segment_def_id)] = str(seg_def.name) if dict: segs[str(seg_def.name)] = segments.segmentlist() for seg in seg_table: if dict: segs[seg_id[int(seg.segment_def_id)]]\ .append(segments.segment(seg.start_time, seg.end_time)) continue if id and int(seg.segment_def_id)==id: segs.append(segments.segment(seg.start_time, seg.end_time)) continue segs.append(segments.segment(seg.start_time, seg.end_time)) if dict: for seg_name in seg_id.values(): segs[seg_name] = segs[seg_name].coalesce() else: segs = segs.coalesce() xmldoc.unlink() return segs
def coalesceResultDictionary(result_dict): out_result_dict=result_dict active_seg_python_list=[seg.segment(i[0],i[1]) for i in result_dict[0]['active']] active_seg_list=seg.segmentlist(active_seg_python_list) active_seg_list.coalesce() out_result_dict[0]['active']=active_seg_list known_seg_python_list=[seg.segment(i[0],i[1]) for i in result_dict[0]['known']] known_seg_list=seg.segmentlist(known_seg_python_list) known_seg_list.coalesce() out_result_dict[0]['known']=known_seg_list return out_result_dict
def test_optimized_query(engine): res = segmentdb_utils.query_segments( engine, 'segment_summary', [ ('H1','DMT-TESTSEG_2',1,924900000,924900016,0,0), ('H1','DMT-TESTSEG_3',1,924900000,924900016,0,0) ] ) if res[0] != segmentlist([segment(924900000, 924900010)]): return False if res[1] != segmentlist([segment(924900008, 924900016)]): return False return True
def add_cache(self, cache_entries): """ Add information from some cache entries. """ newentries = [entry for entry in cache_entries \ if entry.path not in self._remotefiles] newfiles = [entry.path for entry in newentries] newsegs = segmentlist([entry.segment for entry in newentries]) self._remotefiles.extend(newfiles) self._remotesegs.extend(newsegs) self._remotecoverage |= segmentlist(newsegs) self._remotecoverage.coalesce()
def test_basic_seg_summary(engine): res = segmentdb_utils.query_segments( engine, 'segment_summary', [ ('H1','DMT-TESTSEG_1',1,924900000,924900016,0,0) ] ) if res != [ segmentlist( [segment(924900000, 924900016)] ) ]: return False res = segmentdb_utils.query_segments( engine, 'segment_summary', [ ('H1','DMT-TESTSEG_1',2,924900000,924900016,0,0) ] ) if res != [ segmentlist( [segment(924900008, 924900010)] ) ]: return False return True
def get_manually(gps_start_time, gps_end_time): db_location = os.environ["S6_SEGMENT_SERVER"] segment_connection = segmentdb_utils.setup_database(db_location) engine = query_engine.LdbdQueryEngine(segment_connection) # 1. Get v1 science segments sql = "SELECT segment.start_time, segment.end_time " sql += "FROM segment_definer, segment " sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = 'H1' " sql += "AND segment.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = 'DMT-SCIENCE' " sql += "AND segment_definer.version = 1 " sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % (gps_start_time, gps_end_time) v1_science_segments = segmentlist([segment(row[0], row[1]) for row in engine.query(sql)]).coalesce() # 2. Get v2 science summaries sql = "SELECT segment_summary.start_time, segment_summary.end_time " sql += "FROM segment_definer, segment_summary " sql += "WHERE segment_summary.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = 'H1' " sql += "AND segment_summary.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = 'DMT-SCIENCE' " sql += "AND segment_definer.version = 2 " sql += "AND NOT (%s > segment_summary.end_time OR segment_summary.start_time > %s)" % (gps_start_time, gps_end_time) v2_science_summaries = segmentlist([segment(row[0], row[1]) for row in engine.query(sql)]).coalesce() # 1. Get v2 science segments sql = "SELECT segment.start_time, segment.end_time " sql += "FROM segment_definer, segment " sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = 'H1' " sql += "AND segment.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = 'DMT-SCIENCE' " sql += "AND segment_definer.version = 2 " sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % (gps_start_time, gps_end_time) v2_science_segments = segmentlist([segment(row[0], row[1]) for row in engine.query(sql)]).coalesce() result = (v1_science_segments - v2_science_summaries) + v2_science_segments result.coalesce() result &= segmentlist([segment(gps_start_time, gps_end_time)]) return result
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Query the LDR host for the most all frame files of the given site and frametype in the given [gpsstart, gpsend) interval. Use urltype to restrict th returned frames to the given scheme (e.g. "file"). If on_gaps="error" is given, raises RuntimeError if there are gaps in the found frame list, otherwise prints warning if "warn" is given or nothing if "ignore" is given. Use match to return only those frames matching the given regular expression. Returns glue.lal.Cache. """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps=="warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)
def build_segment_list_one( engine, gps_start_time, gps_end_time, ifo, segment_name, version=None, start_pad=0, end_pad=0 ): """Builds a list of segments satisfying the given criteria """ seg_result = segmentlist([]) sum_result = segmentlist([]) # Is there any way to get segment and segement summary in one query? # Maybe some sort of outer join where we keep track of which segment # summaries we've already seen. sql = "SELECT segment_summary.start_time, segment_summary.end_time " sql += "FROM segment_definer, segment_summary " sql += "WHERE segment_summary.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = '%s' " % ifo if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment_summary.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment_summary.end_time OR segment_summary.start_time > %s)" % (gps_start_time, gps_end_time) rows = engine.query(sql) for sum_start_time, sum_end_time in rows: sum_start_time = (sum_start_time < gps_start_time) and gps_start_time or sum_start_time sum_end_time = (sum_end_time > gps_end_time) and gps_end_time or sum_end_time sum_result |= segmentlist([segment(sum_start_time, sum_end_time)]) # We can't use queries paramaterized with ? since the ldbd protocol doesn't support it... sql = "SELECT segment.start_time + %d, segment.end_time + %d " % (start_pad, end_pad) sql += "FROM segment, segment_definer " sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id " if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % (gps_start_time, gps_end_time) rows = engine.query(sql) for seg_start_time, seg_end_time in rows: seg_start_time = (seg_start_time < gps_start_time) and gps_start_time or seg_start_time seg_end_time = (seg_end_time > gps_end_time) and gps_end_time or seg_end_time seg_result |= segmentlist([segment(seg_start_time, seg_end_time)]) engine.close() return sum_result, seg_result
def __init__(self,ifo,name,version,hackDec11=False): self.known=segments.segmentlist([]) self.active=segments.segmentlist([]) # self.metadata={} self.flagDict={} self.ifo=ifo self.name=name self.version=version self.temp_process_ids={} # Used to hold the data # # associated with a process_id if hackDec11: self.insert_history={} else: self.insert_history=[] # holds the process_metadatas and insertion_metadatas # Note that this assumes that proper dictionaries are appended to this list
def get_science_segs_from_datafind_outs(datafindcaches): """ This function will calculate the science segments that are covered in the OutGroupList containing the frame files returned by various calls to the datafind server. This can then be used to check whether this list covers what it is expected to cover. Parameters ---------- datafindcaches : OutGroupList List of all the datafind output files. Returns -------- newScienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances The times covered by the frames found in datafindOuts. """ newScienceSegs = {} for cache in datafindcaches: if len(cache) > 0: groupSegs = segments.segmentlist(e.segment for e in cache).coalesce() ifo = cache.ifo if not newScienceSegs.has_key(ifo): newScienceSegs[ifo] = groupSegs else: newScienceSegs[ifo].extend(groupSegs) newScienceSegs[ifo].coalesce() return newScienceSegs
def find_segments(doc, key, use_segment_table = True): key_pieces = key.split(':') while len(key_pieces) < 3: key_pieces.append('*') filter_func = lambda x: str(x.ifos) == key_pieces[0] and (str(x.name) == key_pieces[1] or key_pieces[1] == '*') and (str(x.version) == key_pieces[2] or key_pieces[2] == '*') # Find all segment definers matching the critieria seg_def_table = lsctables.SegmentDefTable.get_table(doc) seg_defs = filter(filter_func, seg_def_table) seg_def_ids = map(lambda x: str(x.segment_def_id), seg_defs) # Find all segments belonging to those definers if use_segment_table: seg_table = lsctables.SegmentTable.get_table(doc) seg_entries = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_table) else: seg_sum_table = lsctables.SegmentSumTable.get_table(doc) seg_entries = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_sum_table) # Combine into a segmentlist ret = segmentlist(map(lambda x: segment(x.start_time, x.end_time), seg_entries)) ret.coalesce() return ret
def expand_version_number(engine, segdef): ifo, name, version, start_time, end_time, start_pad, end_pad = segdef if version != '*': return [segdef] # Start looking at the full interval intervals = segmentlist([segment(start_time, end_time)]) # Find the maximum version number sql = "SELECT max(version) FROM segment_definer " sql += "WHERE segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % name rows = engine.query(sql) try: version = len(rows[0]) and rows[0][0] or 1 except: version = None results = [] while version > 0: for interval in intervals: segs = query_segments(engine, 'segment_summary', [(ifo, name, version, interval[0], interval[1], 0, 0)]) for seg in segs[0]: results.append( (ifo, name, version, seg[0], seg[1], 0, 0) ) intervals.coalesce() intervals -= segs[0] version -= 1 return results
def get_by_name(self, name, clip_to_valid = False): """ Retrieve the active segmentlists whose name equals name. The result is a segmentlistdict indexed by instrument. All segmentlist objects within it will be copies of the contents of this object, modifications will not affect the contents of this object. If clip_to_valid is True then the segmentlists will be intersected with their respective intervals of validity, otherwise they will be the verbatim active segments. NOTE: the intersection operation required by clip_to_valid will yield undefined results unless the active and valid segmentlist objects are coalesced. """ result = segments.segmentlistdict() for seglist in self: if seglist.name != name: continue segs = seglist.active if clip_to_valid: # do not use in-place intersection segs = segs & seglist.valid for instrument in seglist.instruments: if instrument in result: raise ValueError("multiple '%s' segmentlists for instrument '%s'" % (name, instrument)) result[instrument] = segments.segmentlist(segs) if not result: raise KeyError("no segmentlists named '%s'" % name) return result
def find_times(self, site, frametype, gpsstart=None, gpsend=None): """Query the LDR for times for which frames are avaliable Use gpsstart and gpsend to restrict the returned times to this semiopen interval. @returns: L{segmentlist<glue.segments.segmentlist>} @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} """ if gpsstart and gpsend: url = ("%s/gwf/%s/%s/segments/%s,%s.json" % (_url_prefix, site, frametype, gpsstart, gpsend)) else: url = ("%s/gwf/%s/%s/segments.json" % (_url_prefix, site, frametype)) response = self._requestresponse("GET", url) segmentlist = decode(response.read()) return segments.segmentlist(map(segments.segment, segmentlist))
def segmentlistdict(self): """ A segmentlistdict object describing the instruments and time spanned by this CacheEntry. A new object is constructed each time this attribute is accessed (segments are immutable so there is no reason to try to share a reference to the CacheEntry's internal segment; modifications of one would not be reflected in the other anyway). Example: >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> c.segmentlistdict['H1'] [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))] The \"observatory\" column of the cache entry, which is frequently used to store instrument names, is parsed into instrument names for the dictionary keys using the same rules as glue.ligolw.lsctables.instrumentsproperty.get(). Example: >>> c = CacheEntry("H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml") >>> c.segmentlistdict['H1H2'] [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))] """ # the import has to be done here to break the cyclic # dependancy from glue.ligolw.lsctables import instrumentsproperty instruments = instrumentsproperty.get(self.observatory) or (None,) return segments.segmentlistdict((instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
def get_slide_coincs_from_cache(cachefile, pattern, match, verb, coinc_stat): full_coinc_table = [] cache = cachefile.sieve(description=pattern, exact_match=match) found, missed = cache.checkfilesexist() files = found.pfnlist() if not len(files): print >>sys.stderr, "cache contains no files with " + pattern + " description" return None # split the time slide files into 105 groups to aid with I/O num_files=len(files) #Changed by Tristan Miller as a memory fix #groups_of_files = split_seq(files,105) groups_of_files = split_seq(files,50) for filegroup in groups_of_files: if filegroup: # extract the coinc table coinc_table = SnglInspiralUtils.ReadSnglInspiralFromFiles(filegroup, mangle_event_id=False, verbose=verb, non_lsc_tables_ok=False) segDict = SearchSummaryUtils.GetSegListFromSearchSummaries(filegroup) rings = segments.segmentlist(iterutils.flatten(segDict.values())) rings.sort() for k,ring in enumerate(rings): rings[k] = segments.segment(rings[k][0], rings[k][1] + 10**(-9)) shift_vector = {"H1": 0, "H2": 0, "L1": 5, "V1": 5} if coinc_table: SnglInspiralUtils.slideTriggersOnRingWithVector(coinc_table, shift_vector, rings) full_coinc_table.extend(CoincInspiralUtils.coincInspiralTable(coinc_table,coinc_stat)) return full_coinc_table
def convert_json_list_to_segmentlist(jsonlist): """ Helper function used to convert json list of lists type object to a segmentlist object """ segment_list=segments.segmentlist([segments.segment(x[0],x[1]) for x in jsonlist]) return segment_list
def __init__(self, active = (), valid = (), instruments = set(), name = None, version = None, comment = None): """ Initialize a new LigolwSegmentList instance. active and valid are sequences that will be cast to segments.segmentlist objects. They can be generator expressions. The "active" sequence is what is usually thought of as the segment list, the "valid" sequence identifies the intervals of time for which the segment list's state is defined. """ self.valid = segments.segmentlist(valid) self.active = segments.segmentlist(active) self.instruments = instruments self.name = name self.version = version self.comment = comment
def tosegmentxml(file, segs): """ Write the glue.segments.segmentlist object segs to file object file in xml format with appropriate tables. """ # generate empty document xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) xmldoc.childNodes[-1].appendChild(lsctables.New(lsctables.ProcessTable)) xmldoc.childNodes[-1].appendChild(lsctables.New(lsctables.ProcessParamsTable)) # append process to table process = ligolw_process.append_process(xmldoc,\ program='pylal.dq.dqSegmentUtils',\ version=__version__,\ cvs_repository='lscsoft',\ cvs_entry_time=__date__) gpssegs = segments.segmentlist() for seg in segs: gpssegs.append(segments.segment(LIGOTimeGPS(seg[0]), LIGOTimeGPS(seg[1]))) # append segs and seg definer segments_tables = ligolw_segments.LigolwSegments(xmldoc) segments_tables.add(ligolw_segments.LigolwSegmentList(active=gpssegs)) # finalise segments_tables.coalesce() segments_tables.optimize() segments_tables.finalize(process) ligolw_process.set_process_end_time(process) # write file utils.write_fileobj(xmldoc, file, gz=False)
ifo_times = "".join(grb_ifolist) if offSourceSegment is None: print( "Warning: insufficient multi-IFO data to construct an off-source segment for GRB %s; skipping" % grb.event_number_grb, file=sys.stderr) continue elif opts.verbose: print("Sufficient off-source data has been found in", ifo_times, "time.") # write out the segment list to a segwizard file offsource_segfile = idirectory + "/offSourceSeg.txt" segmentsUtils.tosegwizard(open(offsource_segfile, "w"), segments.segmentlist([offSourceSegment])) onsource_segfile = idirectory + "/onSourceSeg.txt" segmentsUtils.tosegwizard(file(onsource_segfile, "w"), segments.segmentlist([onSourceSegment])) segLen = abs(onSourceSegment) bufferSegment = segments.segment( onSourceSegment[0]-opts.number_buffer_left*segLen,\ onSourceSegment[1]+opts.number_buffer_right*segLen) buffer_segfile = idirectory + "/bufferSeg.txt" segmentsUtils.tosegwizard(file(buffer_segfile, "w"), segments.segmentlist([bufferSegment])) if opts.verbose: print("on-source segment: ", onSourceSegment) print("off-source segment: ", offSourceSegment) ############################################################################
############################################################################## # get the pad and chunk lengths from the values in the ini file paddata = int(cp.get('data', 'pad-data')) n = int(cp.get('data', 'segment-length')) s = int(cp.get('data', 'number-of-segments')) r = int(cp.get('data', 'sample-rate')) o = int(cp.get('inspiral', 'segment-overlap')) length = (n * s - (s - 1) * o) / r overlap = o / r minsciseg = length + 2 * paddata ############################################################################## # Based on the start and end time, generate a list of epochs to # analyze. An entire hipe dag will be run for each of these epochs. search_epochs = segments.segmentlist() istart = opts.start_time while (istart < opts.end_time): iend = istart + opts.interval if iend > opts.end_time: iend = opts.end_time search_epochs.append(segments.segment(istart, iend)) istart += opts.interval # FIXME: the writing out of the segments should be done at the end so # that successfully generated dags, etc can be maintained from run to # run segmentsUtils.tosegwizard(file("multi_hipe_selectedsegs.txt", 'w'), search_epochs) ############################################################################## # Read in all the segment lists
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Find the framefiles for the given type in the [start, end) interval frame @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @param match: regular expression to match against @param urltype: file scheme to search for (e.g. 'file') @param on_gaps: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} @type match: L{str} @type urltype: L{str} @type on_gaps: L{str} @returns: L{Cache<glue.lal.Cache>} @raises RuntimeError: if gaps are found and C{on_gaps='error'} """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps == "warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)
def __init__(self, ifo): self.fig, self.axes = SnglBurstUtils.make_burst_plot( "GPS Time (s)", "Frequency (Hz)") self.ifo = ifo self.nevents = 0 self.seglist = segments.segmentlist()
def plotdutycycle(segdict, outfile, binlength=3600, keys=None, t0=None,\ showmean=False, **kwargs): """ Plot the percentage duty cycle each flag in the given glue.segments.segmentlistdict, binned over the given duration. """ # get time limits xlim = kwargs.pop("xlim", None) if xlim is None: try: extents = [seg.extent() for seg in segdict.values()] start = min(s[0] for s in extents) end = max(s[1] for s in extents) except ValueError: start = 0 end = 1 xlim = start, end else: start, end = xlim # get unit for plot unit, timestr = plotutils.time_axis_unit(end - start) # set xlabel and renomalize time xlabel = kwargs.pop("xlabel", None) if not xlabel: if not t0: t0 = start unit, timestr = plotutils.time_axis_unit(end - start) t0 = LIGOTimeGPS(float(t0)) if t0.nanoseconds == 0: xlabel = datetime.datetime(*date.XLALGPSToUTC(t0)[:6])\ .strftime("%B %d %Y, %H:%M:%S %ZUTC") xlabel = "Time (%s) since %s (%s)" % (timestr, xlabel, int(t0)) else: xlabel = datetime.datetime(*date.XLALGPSToUTC(\ LIGOTimeGPS(t0.seconds))[:6])\ .strftime("%B %d %Y, %H:%M:%S %ZUTC") xlabel = "Time (%s) since %s (%s)"\ % (timestr, xlabel.replace(" UTC", ".%.3s UTC" % t0.nanoseconds),\ t0) t0 = float(t0) xlim[0] = (start - t0) / unit xlim[1] = (end - t0) / unit ylabel = kwargs.pop("ylabel", "") title = kwargs.pop("title", "") subtitle = kwargs.pop("subtitle", "") # get other parameters loc = kwargs.pop("loc", 0) legalpha = kwargs.pop("alpha", 0.8) labels_inset = kwargs.pop("labels_inset", False) bbox_inches = kwargs.pop("bbox_inches", "tight") hidden_colorbar = kwargs.pop("hidden_colorbar", False) # escape underscores for latex text if not keys: keys = segdict.keys() if pylab.rcParams["text.usetex"]: newdict = segments.segmentlistdict() for i, key in enumerate(keys): newkey = re.sub('(?<!\\\\)_', '\_', key) keys[i] = newkey newdict[newkey] = segdict[key] segdict = newdict # # generate duty cycle info # # generate bins binlength = float(binlength) if int(end - start) % binlength == 0: numbins = int(end - start) / binlength else: numbins = float(end - start) // binlength + 1 bins = numpy.arange(float(start), float(end), binlength) + binlength / 2 duty = dict((key, numpy.zeros(numbins)) for key in keys) bs = float(start) for i in range(numbins): be = float(bs + binlength) seg = segments.segmentlist([segments.segment(bs, be)]) for key in keys: duty[key][i] = float(abs(segdict[key] & seg)) / abs(seg) * 100 bs += binlength if showmean: mean = dict((key, numpy.zeros(numbins)) for key in keys) for key in keys: mean[key] = [duty[key][:i + 1].mean() for i in range(numbins)] # # generate plot # bins = (bins - t0) / unit plot = plotutils.BarPlot(xlabel, ylabel, title, subtitle) for i, key in enumerate(keys): if showmean: thislabel = plotutils.display_name( key) + ' (%.2f\%%)' % (mean[key][-1]) else: thislabel = plotutils.display_name(key) plot.add_content(bins, duty[key], label=thislabel,\ alpha=0.8, width=binlength/unit) plot.finalize(loc=loc, alpha=legalpha) # add running mean if showmean: for i, key in enumerate(keys): print i, key plot.ax.plot(bins, mean[key], linestyle='--') plot.ax.get_legend().get_frame().set_alpha(0.5) # add colorbar if hidden_colorbar: plotutils.add_colorbar(plot.ax, visible=False) # set limits plot.ax.autoscale_view(tight=True, scalex=True) if xlim: plot.ax.set_xlim(map(float, xlim)) plot.ax.set_ylim(0, 100) # set grid plot.ax.grid(True, which="both") plotutils.set_time_ticks(plot.ax) plotutils.set_minor_ticks(plot.ax, x=False) # save figure plot.savefig(outfile, bbox_inches=bbox_inches,\ bbox_extra_artists=plot.ax.texts)
def grab_segments(start, end, flag,\ segment_url='https://segdb.ligo.caltech.edu',\ segment_summary=False): """ Returns a segmentlist containing the segments during which the given flag was active in the given period. Arguments: start : int GPS start time end : int GPS end time flag : string 'IFO:NAME:VERSION' format string Keyword arguments: segment_url : string url of segment database to query, default https://segdb.ligo.caltech.edu segment_summary : [ True | False ] also return the glue.segments.segmentlist defining the valid span of the returned segments """ # set times start = int(math.floor(start)) end = int(math.ceil(end)) # set query engine connection = segmentdb_utils.setup_database(segment_url) engine = query_engine.LdbdQueryEngine(connection) # format flag name if isinstance(flag, basestring): flags = flag.split(',') else: flags = flag segdefs = [] for f in flags: spec = f.split(':') if len(spec) < 2 or len(spec) > 3: raise AttributeError, "Included segements must be of the form "+\ "ifo:name:version or ifo:name:*" ifo = spec[0] name = spec[1] if len(spec) is 3 and spec[2] is not '*': version = int(spec[2]) if version < 1: raise AttributeError, "Segment version numbers must be greater than zero" else: version = '*' # expand segment definer segdefs += segmentdb_utils.expand_version_number(engine, (ifo, name, version, \ start, end, 0, 0)) # query database and return segs = segmentdb_utils.query_segments(engine, 'segment', segdefs) segs = [s.coalesce() for s in segs] if segment_summary: segsums = segmentdb_utils.query_segments(engine, 'segment_summary', segdefs) #segsums = reduce(operator.or_, segsums).coalesce() segsums = [s.coalesce() for s in segsums] segsummap = [segments.segmentlist() for f in flags] for segdef, segsum in zip(segdefs, segsums): try: fidx = flags.index(':'.join(map(str, segdef[:3]))) except ValueError: fidx = flags.index(':'.join(segdef[:2])) segsummap[fidx].extend(segsum) if flag == flags[0]: return segs[0], segsummap[0] else: return segs, segsummap if flag == flags[0]: return segs[0] else: return segs
def __init__(self, *files, **kwargs): sample = kwargs.get('sample', 'foreground') # Open the files and split them into coinc files, bank files, psds, # and triggers. key = itemgetter(0) files = [_hdf_file(f) for f in files] files = sorted([(_classify_hdf_file(f, sample), f) for f in files], key=key) files = { key: list(v[1] for v in value) for key, value in groupby(files, key) } try: coinc_file, = files['coincs'] except (KeyError, ValueError): raise ValueError('You must provide exactly one coinc file.') try: bank_file, = files['bank'] except (KeyError, ValueError): raise ValueError( 'You must provide exactly one template bank file.') try: psd_files = files['psds'] except KeyError: raise ValueError('You must provide PSD files.') try: trigger_files = files['triggers'] except KeyError: raise ValueError('You must provide trigger files.') self._bank = bank_file key_prefix = 'detector_' detector_nums, self._ifos = zip(*sorted( (int(key[len(key_prefix):]), value) for key, value in coinc_file.attrs.items() if key.startswith(key_prefix))) coinc_group = coinc_file[sample] self._timeslide_interval = coinc_file.attrs.get( 'timeslide_interval', 0) self._template_ids = coinc_group['template_id'] self._timeslide_ids = coinc_group.get('timeslide_id', np.zeros(len(self))) self._trigger_ids = [ coinc_group['trigger_id{}'.format(detector_num)] for detector_num in detector_nums ] triggers = {} for f in trigger_files: (ifo, group), = f.items() triggers[ifo] = [ group['snr'], group['coa_phase'], group['end_time'] ] self._triggers = tuple(triggers[ifo] for ifo in self._ifos) psdseglistdict = {} for psd_file in psd_files: (ifo, group), = psd_file.items() psd = [group['psds'][str(i)] for i in range(len(group['psds']))] psdseglistdict[ifo] = segmentlist( _psd_segment(*segargs) for segargs in zip( psd, group['start_time'], group['end_time'])) self._psds = [psdseglistdict[ifo] for ifo in self._ifos]
def InsertMultipleDQXMLFileThreaded(filenames, logger, server='http://slwebtest.virgo.infn.it', hackDec11=True, debug=True, threads=1, testing_options={}): """ Inserts multiple dqxml files of data into the DQSEGDB. - filenames is a list of string filenames for DQXML files. - hackDec11 is used to turn off good features that the server doesn't yet support. returns True if it completes sucessfully - options is a dictionary including (optionally):offset(int),synchronize(time in 'HH:MM' format (string)) """ logger.info( "Beginning call to InsertMultipleDQXMLFileThreaded. This message last updated April 14 2015, Ciao da Italia!" ) from threading import Thread from Queue import Queue import sys # Make a call to server+'/dq': protocol = server.split(':')[0] serverfqdn = server.split('/')[-1] apiResult = queryAPIVersion(protocol, serverfqdn, False) # If the API change results in a backwards incompatibility, handle it here with a flag that affects behavior below if apiResult >= "2.1.0": # S6 style comments are needed new_comments = True else: # Older server, so don't want to supply extra comments... new_comments = False if apiResult >= "2.1.15": # Alteration to insertion_metadata from uri to comment to accomodate s6 data conversion use_new_insertion_metadata = True else: use_new_insertion_metadata = False if 'offset' in testing_options: offset = int(testing_options['offset']) else: offset = 0 if 'synchronize' in testing_options: synchronize = testing_options['synchronize'] xmlparser = pyRXP.Parser() lwtparser = ldbd.LIGOLwParser() flag_versions = {} # flag_versions, filename, server, hackDec11, debug are current variables # This next bunch of code is specific to a given file: if len(filenames) < 1: print "Empty file list sent to InsertMultipleDQXMLFileThreaded" raise ValueError for filename in filenames: segment_md = setupSegment_md(filename, xmlparser, lwtparser, debug) # segment_md, flag_versions, filename, server, hackDec11, debug are current variables flag_versions_numbered = {} for j in range(len(segment_md.table['segment_definer']['stream'])): flag_versions_numbered[j] = {} for i, entry in enumerate( segment_md.table['segment_definer']['orderedcol']): #print j,entry,segment_md.table['segment_definer']['stream'][j][i] flag_versions_numbered[j][entry] = segment_md.table[ 'segment_definer']['stream'][j][i] # parse process table and make a dict that corresponds with each # process, where the keys for the dict are like "process:process_id:1" # so that we can match # these to the flag_versions from the segment definer in the next # section # Note: Wherever temp_ preceeds a name, it is generally an identifier # field from the dqxml, that is only good for the single dqxml file # being parsed process_dict = {} # Going to assign process table streams to process_dict with a key # matching process_id (process:process_id:0 for example) for j in range(len(segment_md.table['process']['stream'])): process_id_index = segment_md.table['process']['orderedcol'].index( 'process_id') temp_process_id = segment_md.table['process']['stream'][j][ process_id_index] # Now we're going to assign elements to process_dict[process_id] process_dict[temp_process_id] = {} for i, entry in enumerate( segment_md.table['process']['orderedcol']): #print j,entry,segment_md.table['process']['stream'][j][i] process_dict[temp_process_id][entry] = segment_md.table[ 'process']['stream'][j][i] # Note that the segment_md.table['process']['stream'][0] looks like this: #0 program SegGener #0 version 6831 #0 cvs_repository https://redoubt.ligo-wa.caltech.edu/ #0 svn/gds/trunk/Monitors/SegGener/SegGener.cc #0 cvs_entry_time 1055611021 #0 comment Segment generation from an OSC condition #0 node l1gds2 #0 username [email protected] #0 unix_procid 24286 #0 start_time 1065916603 #0 end_time 1070395521 #0 process_id process:process_id:0 #0 ifos L0L1 # So now I have all of that info stored by the process_id keys # Eventually I have to map these elements to the process_metadata # style.. maybe I can do that now: process_dict[temp_process_id]['process_metadata'] = {} if hackDec11: process_dict[temp_process_id]['process_metadata'][ 'process_start_time'] = process_dict[temp_process_id][ 'start_time'] else: # This is for the newer server APIs: (April 24 2015 we checked it (it probably changed before ER6 finally)) process_dict[temp_process_id]['process_metadata'][ 'process_start_timestamp'] = process_dict[temp_process_id][ 'start_time'] if new_comments: process_dict[temp_process_id][ 'process_comment'] = process_dict[temp_process_id][ 'comment'] process_dict[temp_process_id]['process_metadata'][ 'uid'] = process_dict[temp_process_id]['username'] process_dict[temp_process_id]['process_metadata']['args'] = [ ] ### Fix!!! dqxml has no args??? process_dict[temp_process_id]['process_metadata'][ 'pid'] = process_dict[temp_process_id]['unix_procid'] process_dict[temp_process_id]['process_metadata'][ 'name'] = process_dict[temp_process_id]['program'] process_dict[temp_process_id]['process_metadata'][ 'fqdn'] = process_dict[temp_process_id][ 'node'] ### Fix!!! Improvement: not really fqdn, just the node name # So now I have process_dict[temp_process_id]['process_metadata'] for each # process_id, and can add it to a flag version when it uses it; really I # should group it with the segment summary info because that has the # insertion_metadata start and stop time ### Fix!!! Get the args from the *other* process table... yikes ### Double check what is done below works! # First pass: #if debug: # import pdb # pdb.set_trace() temp_process_params_process_id = None try: len(segment_md.table['process_params']['stream']) except: logger.info("No process_params table for file: %s" % filename) else: for j in range(len(segment_md.table['process_params']['stream'])): process_id_index = segment_md.table['process_params'][ 'orderedcol'].index('process_id') temp_process_params_process_id = segment_md.table[ 'process_params']['stream'][j][process_id_index] # This next bit looks a bit strange, but the goal is to pull off only the param and value from each row of the process_params table, and then put them into the process_metadata # Thus we loop through the columns in each row and toss out everything but the param and value entries, and then outside the for loop, append them to the args list for i, entry in enumerate( segment_md.table['process_params']['orderedcol']): if entry == "param": temp_param = str( segment_md.table['process_params']['stream'][j][i]) if entry == "value": temp_value = str( segment_md.table['process_params']['stream'][j][i]) process_dict[temp_process_params_process_id][ 'process_metadata']['args'].append(str(temp_param)) process_dict[temp_process_params_process_id][ 'process_metadata']['args'].append(str(temp_value)) #if debug: # import pdb # pdb.set_trace() temp_id_to_flag_version = {} for i in flag_versions_numbered.keys(): ifo = flag_versions_numbered[i]['ifos'] name = flag_versions_numbered[i]['name'] version = flag_versions_numbered[i]['version'] if (ifo, name, version) not in flag_versions.keys(): if new_comments == True: flag_versions[(ifo, name, version)] = InsertFlagVersion( ifo, name, version) else: flag_versions[(ifo, name, version)] = InsertFlagVersionOld( ifo, name, version) if new_comments: flag_versions[(ifo, name, version)].flag_description = str( flag_versions_numbered[i]['comment'] ) # old segment_definer comment = new flag_description # OUTDATED PLACEHOLDER: flag_versions[(ifo,name,version)].version_comment=str(flag_versions_numbered[i]['comment']) else: flag_versions[(ifo, name, version)].flag_comment = str( flag_versions_numbered[i]['comment']) flag_versions[(ifo, name, version)].version_comment = str( flag_versions_numbered[i]['comment']) flag_versions[( ifo, name, version)].temporary_definer_id = flag_versions_numbered[i][ 'segment_def_id'] flag_versions[( ifo, name, version )].temporary_process_id = flag_versions_numbered[i]['process_id'] # Populate reverse lookup dictionary: temp_id_to_flag_version[flag_versions[( ifo, name, version)].temporary_definer_id] = (ifo, name, version) # ways to solve the metadata problem: # Associate each insertion_metadata block with a process, then group # them and take the min insert_data_start and max insert_data_stop # parse segment_summary table and associate known segments with # flag_versions above: ## Note this next line is needed for looping over multiple files for i in flag_versions.keys(): flag_versions[i].temp_process_ids = {} for j in range(len(segment_md.table['segment_summary']['stream'])): #flag_versions_numbered[j] = {} seg_def_index = segment_md.table['segment_summary'][ 'orderedcol'].index('segment_def_id') #print "associated seg_def_id is: "+ segment_md.table['segment_summary']['stream'][j][seg_def_index] (ifo, name, version ) = temp_id_to_flag_version[segment_md.table['segment_summary'] ['stream'][j][seg_def_index]] seg_sum_index = segment_md.table['segment_summary'][ 'orderedcol'].index('segment_sum_id') # Unneeded: #flag_versions[(ifo,name,version)].temporary_segment_sum_id = segment_md.table['segment_summary']['stream'][j][seg_sum_index] start_time_index = segment_md.table['segment_summary'][ 'orderedcol'].index('start_time') end_time_index = segment_md.table['segment_summary'][ 'orderedcol'].index('end_time') start_time = segment_md.table['segment_summary']['stream'][j][ start_time_index] + offset end_time = segment_md.table['segment_summary']['stream'][j][ end_time_index] + offset comment_index = segment_md.table['segment_summary'][ 'orderedcol'].index('comment') seg_sum_comment = segment_md.table['segment_summary']['stream'][j][ comment_index] new_seg_summary = segments.segmentlist( [segments.segment(start_time, end_time)]) flag_versions[(ifo, name, version)].appendKnown(new_seg_summary) # Now I need to build up the insertion_metadata dictionary for this # summary: # Now I need to associate the right process with the known # segments here, and put the start and end time into the # insertion_metadata part of the # insert_history dict # Plan for processes and affected data: # Loop through segment summaries # If we haven't seen the associated process before, create it: # First, append the temp_process_id to temp_process_ids # Then, each temp_process_ids entry is a dictionary, where the one # element is start_affected time, and the other is end_affected # time, and later we will combine this with the correct # process_metadata dictionary process_id_index = segment_md.table['segment_summary'][ 'orderedcol'].index('process_id') temp_process_id = segment_md.table['segment_summary']['stream'][j][ process_id_index] if temp_process_id in flag_versions[( ifo, name, version)].temp_process_ids.keys(): # We don't need to append this process metadata, as it already # exists We do need to extend the affected data start and stop # to match if start_time < flag_versions[( ifo, name, version )].temp_process_ids[temp_process_id]['insert_data_start']: flag_versions[(ifo, name, version)].temp_process_ids[ temp_process_id]['insert_data_start'] = start_time if end_time > flag_versions[( ifo, name, version )].temp_process_ids[temp_process_id]['insert_data_stop']: flag_versions[(ifo, name, version)].temp_process_ids[ temp_process_id]['insert_data_stop'] = end_time else: # Need to make the dictionary entry for this process_id if seg_sum_comment != None: flag_versions[(ifo, name, version)].provenance_url = seg_sum_comment else: flag_versions[(ifo, name, version)].provenance_url = '' flag_versions[( ifo, name, version)].temp_process_ids[temp_process_id] = {} flag_versions[(ifo, name, version)].temp_process_ids[ temp_process_id]['insert_data_start'] = start_time flag_versions[(ifo, name, version)].temp_process_ids[ temp_process_id]['insert_data_stop'] = end_time # Now, I need to append an insert_history element to the flag_versions # for this ifo,name, version, as I have the correct insertion_metadata # and the correct # process_metadata (from the process_dict earlier if debug: t1 = time.time() for i in flag_versions.keys(): for pid in flag_versions[i].temp_process_ids.keys(): start = flag_versions[i].temp_process_ids[pid][ 'insert_data_start'] stop = flag_versions[i].temp_process_ids[pid][ 'insert_data_stop'] if new_comments: flag_versions[i].flag_version_comment = process_dict[pid][ 'process_comment'] insert_history_dict = {} try: insert_history_dict['process_metadata'] = process_dict[ pid]['process_metadata'] except: raise # import pdb # pdb.set_trace() insert_history_dict['insertion_metadata'] = {} insert_history_dict['insertion_metadata'][ 'insert_data_stop'] = stop insert_history_dict['insertion_metadata'][ 'insert_data_start'] = start ifo = flag_versions[i].ifo version = flag_versions[i].version name = flag_versions[i].name if use_new_insertion_metadata == True: insert_history_dict['insertion_metadata'][ 'comment'] = '/dq/' + '/'.join( [str(ifo), str(name), str(version)] ) # FIX make dq a constant string in case we ever change it else: insert_history_dict['insertion_metadata'][ 'uri'] = '/dq/' + '/'.join( [str(ifo), str(name), str(version)] ) # FIX make dq a constant string in case we ever change it #print ifo,name,version insert_history_dict['insertion_metadata'][ 'timestamp'] = _UTCToGPS(time.gmtime()) insert_history_dict['insertion_metadata'][ 'auth_user'] = process.get_username() #if hackDec11: # # note that this only uses one insert_history...despite # all that hard work to get the list right... # # so this might break something... # flag_versions[i].insert_history=insert_history_dict #else: # flag_versions[i].insert_history.append(insert_history_dict) flag_versions[i].insert_history.append(insert_history_dict) # parse segment table and associate known segments with flag_versions # above: try: for j in range(len(segment_md.table['segment']['stream'])): #flag_versions_numbered[j] = {} seg_def_index = segment_md.table['segment'][ 'orderedcol'].index('segment_def_id') #print "associated seg_def_id is: "+ # segment_md.table['segment']['stream'][j][seg_def_index] (ifo, name, version) = temp_id_to_flag_version[ segment_md.table['segment']['stream'][j][seg_def_index]] #seg_sum_index = segment_md.table['segment']['orderedcol'].index('segment_sum_id') start_time_index = segment_md.table['segment'][ 'orderedcol'].index('start_time') end_time_index = segment_md.table['segment'][ 'orderedcol'].index('end_time') start_time = segment_md.table['segment']['stream'][j][ start_time_index] + offset end_time = segment_md.table['segment']['stream'][j][ end_time_index] + offset new_seg = segments.segmentlist( [segments.segment(start_time, end_time)]) flag_versions[(ifo, name, version)].appendActive(new_seg) except KeyError: logger.info("No segment table for this file: %s" % filename) if debug: print "No segment table for this file: %s" % filename except: print "Unexpected error:", sys.exc_info()[0] raise for i in flag_versions.keys(): flag_versions[i].coalesceInsertHistory() if threads > 1: # Call this after the loop over files, and we should be good to go concurrent = min(threads, len(i)) # Fix!!! why did I do len(i) ??? q = Queue(concurrent * 2) # Fix!!! Improvement: remove hardcoded concurrency for i in range(concurrent): t = Thread(target=threadedPatchWithFailCases, args=[q, server, debug, logger]) t.daemon = True t.start() for i in flag_versions.values(): i.buildFlagDictFromInsertVersion() #i.flagDict url = i.buildURL(server) if debug: print url logger.debug("json.dumps(i.flagDict):") logger.debug("%s" % json.dumps(i.flagDict)) #if hackDec11: # if len(i.active)==0: # print "No segments for this url" # continue q.put(i) q.join() else: for i in flag_versions.values(): i.buildFlagDictFromInsertVersion() #i.flagDict url = i.buildURL(server) if debug: logger.debug("Url for the following data: %s" % url) #print url logger.debug("json.dumps(i.flagDict):") logger.debug("%s" % json.dumps(i.flagDict)) #if hackDec11: # if len(i.active)==0: # print "No segments for this url" # continue patchWithFailCases(i, url, debug, logger, testing_options) if debug: logger.debug( "If we made it this far, no errors were encountered in the inserts." ) #print "If we made it this far, no errors were encountered in the inserts." ### Fix!!! Improvement: Should be more careful about error handling here. if debug: t2 = time.time() logger.debug("Time elapsed for file %s = %d." % (filename, t2 - t1)) #print "Time elapsed for file %s = %d." % (filename,t2-t1) return True
def fromsegmentxml(xml_file, return_dict=False, select_seg_def_id=None): """ Read a glue.segments.segmentlist from the file object file containing an xml segment table. Parameters ----------- xml_file : file object file object for segment xml file return_dict : boolean, optional (default = False) returns a glue.segments.segmentlistdict containing coalesced glue.segments.segmentlists keyed by seg_def.name for each entry in the contained segment_def_table. select_seg_def_id : int, optional (default = None) returns a glue.segments.segmentlist object containing only those segments matching the given segment_def_id integer Returns -------- segs : glue.segments.segmentlist instance The segment list contained in the file. """ # load XML with SegmentDefTable and SegmentTable xmldoc, digest = utils.load_fileobj(xml_file, gz=xml_file.name.endswith(".gz"), contenthandler=ContentHandler) seg_def_table = table.get_table(xmldoc, lsctables.SegmentDefTable.tableName) seg_table = table.get_table(xmldoc, lsctables.SegmentTable.tableName) if return_dict: segs = segments.segmentlistdict() else: segs = segments.segmentlist() seg_id = {} for seg_def in seg_def_table: # encode ifo, channel name and version full_channel_name = ':'.join( [str(seg_def.ifos), str(seg_def.name), str(seg_def.version)]) seg_id[int(seg_def.segment_def_id)] = full_channel_name if return_dict: segs[full_channel_name] = segments.segmentlist() for seg in seg_table: seg_obj = segments.segment( lal.LIGOTimeGPS(seg.start_time, seg.start_time_ns), lal.LIGOTimeGPS(seg.end_time, seg.end_time_ns)) if return_dict: segs[seg_id[int(seg.segment_def_id)]].append(seg_obj) elif select_seg_def_id is not None: if int(seg.segment_def_id) == select_seg_def_id: segs.append(seg_obj) else: segs.append(seg_obj) if return_dict: for seg_name in seg_id.values(): segs[seg_name] = segs[seg_name].coalesce() else: segs = segs.coalesce() xmldoc.unlink() return segs
def __init__(self, channel_name, seglist=None, description="", bitmask=""): seglist = segments.segmentlist([]) if seglist is None else seglist self.channel_name = channel_name self.segmentlist = seglist self.description = description self.bitmask = bitmask
def make_grb_segments_plot(wkflow, science_segs, trigger_time, trigger_name, out_dir, coherent_seg=None, fail_criterion=None): ifos = wkflow.ifos if len(science_segs.keys()) == 0: extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")), int(wkflow.cp.get("workflow", "end-time"))) else: pltpad = [science_segs.extent_all()[1] - trigger_time, trigger_time - science_segs.extent_all()[0]] extent = segments.segmentlist([science_segs.extent_all(), segments.segment(trigger_time - pltpad[0], trigger_time + pltpad[1])]).extent() ifo_colors = {} for ifo in ifos: ifo_colors[ifo] = ifo_color(ifo) if ifo not in science_segs.keys(): science_segs[ifo] = segments.segmentlist([]) # Make plot fig, subs = plt.subplots(len(ifos), sharey=True) plt.xticks(rotation=20, ha='right') plt.subplots_adjust(bottom=0.15) for sub, ifo in zip(subs, ifos): for seg in science_segs[ifo]: sub.add_patch(Rectangle((seg[0], 0.1), abs(seg), 0.8, facecolor=ifo_colors[ifo], edgecolor='none')) if coherent_seg: if len(science_segs[ifo]) > 0 and \ coherent_seg in science_segs[ifo]: sub.plot([trigger_time, trigger_time], [0, 1], '-', c='orange') sub.add_patch(Rectangle((coherent_seg[0], 0), abs(coherent_seg), 1, alpha=0.5, facecolor='orange', edgecolor='none')) else: sub.plot([trigger_time, trigger_time], [0, 1], ':', c='orange') sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1], '--', c='orange', alpha=0.5) sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1], '--', c='orange', alpha=0.5) else: sub.plot([trigger_time, trigger_time], [0, 1], ':k') if fail_criterion: if len(science_segs[ifo]) > 0: style_str = '--' else: style_str = '-' sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1], style_str, c='black', alpha=0.5) sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1], style_str, c='black', alpha=0.5) sub.set_frame_on(False) sub.set_yticks([]) sub.set_ylabel(ifo, rotation=45) sub.set_xlim([float(extent[0]), float(extent[1])]) sub.get_xaxis().get_major_formatter().set_useOffset(False) sub.get_xaxis().get_major_formatter().set_scientific(False) sub.get_xaxis().tick_bottom() if not sub is subs[-1]: sub.get_xaxis().set_ticks([]) sub.get_xaxis().set_ticklabels([]) else: sub.tick_params(labelsize=10, pad=1) xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval() ymin, ymax = fig.axes[-1].get_yaxis().get_view_interval() fig.axes[-1].add_artist(Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) fig.axes[-1].set_xlabel('GPS Time') fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name) fig.subplots_adjust(hspace=0) plot_name = 'GRB%s_segments.png' % trigger_name plot_url = 'file://localhost%s/%s' % (out_dir, plot_name) fig.savefig('%s/%s' % (out_dir, plot_name)) return [ifos, plot_name, extent, plot_url]
# name the DQ flag optic = channel[0].split('_')[2] flag_name = 'L1:DCH-EQ_%s_GT_%s:1' % (optic, thresh) # grab all observing (or whatever is defined) time active = DataQualityFlag.query_dqsegdb(args.science, args.start, args.end).active # grab only data for the STS channel in observing time data = get_timeseries_dict(channel, active, frametype='L1_M') # find times above threshold time = [j.times[j > thresh] for j in data[channel[0]]] times = numpy.concatenate(time) # put all times above threshold in to segments segs = segments.segmentlist() segs.extend([segments.segment(int(t.value), int(t.value)+args.stride) for t in times]) segs = segs.coalesce() # set up the xml file by making a list of the start and end times of the flag start_time = [] start_time.extend([t[0] for t in segs]) end_time = [] end_time.extend([t[1] for t in segs]) # put in to dq flag object flag = DataQualityFlag(flag_name, active=zip(start_time,end_time), known=[[args.start,args.end]]) # write flag to xml flag.write(segment_file)
def query_segments(engine, table, segdefs): # each segdef is a list containing: # ifo, name, version, start_time, end_time, start_pad, end_pad # The trivial case: if there's nothing to do, return no time if len(segdefs) == 0: return [segmentlist([])] # # For the sake of efficiency we query the database for all the segdefs at once # This constructs a clause that looks for one # def make_clause(table, segdef): ifo, name, version, start_time, end_time, start_pad, end_pad = segdef sql = " (segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%d > %s.end_time OR %s.start_time > %d)) " % ( start_time, table, table, end_time) return sql clauses = [make_clause(table, segdef) for segdef in segdefs] sql = 'SELECT segment_definer.ifos, segment_definer.name, segment_definer.version, ' sql += ' %s.start_time, %s.end_time ' % (table, table) sql += ' FROM segment_definer, %s ' % table sql += ' WHERE %s.segment_def_id = segment_definer.segment_def_id AND ' % table if engine.__class__ == query_engine.LdbdQueryEngine: sql += " %s.segment_def_cdb = segment_definer.creator_db AND " % table sql += '( ' + ' OR '.join(clauses) + ' )' rows = engine.query(sql) # # The result of a query will be rows of the form # ifo, name, version, start_time, end_time # # We want to associate each returned row with the segdef it belongs to so that # we can apply the correct padding. # # If segdefs were uniquely spcified by (ifo, name, version) this would # be easy, but it may happen that we're looking for the same segment definer # at multiple disjoint times. In particular this can happen if the user # didn't specify a version number; in that case we might have version 2 # of some flag defined over multiple disjoint segment_definers. # results = [] for segdef in segdefs: ifo, name, version, start_time, end_time, start_pad, end_pad = segdef search_span = segment(start_time, end_time) search_span_list = segmentlist([search_span]) # See whether the row belongs to the current segdef. Name, ifo and version must match # and the padded segment must overlap with the range of the segdef. def matches(row): return (row[0].strip() == ifo and row[1] == name and int(row[2]) == int(version) and search_span.intersects( segment(row[3] + start_pad, row[4] + start_pad))) # Add the padding. Segments may extend beyond the time of interest, chop off the excess. def pad_and_truncate(row_start, row_end): tmp = segmentlist( [segment(row_start + start_pad, row_end + end_pad)]) # No coalesce needed as a list with a single segment is already coalesced tmp &= search_span_list # The intersection is guaranteed to be non-empty if the row passed match() # PR 2969: The above comment is incorrect. Negative padding may cause # an empty intersection. if len(tmp) == 0: return segment(0, 0) else: return tmp[0] # Build a segment list from the returned segments, padded and trunctated. The segments will # not necessarily be disjoint, if the padding crosses gaps. They are also not gauranteed to # be in order, since there's no ORDER BY in the query. So the list needs to be coalesced # before arithmatic can be done with it. result = segmentlist([ pad_and_truncate(row[3], row[4]) for row in rows if matches(row) ]).coalesce() # This is not needed: since each of the segments are constrained to be within the search # span the whole list must be as well. # result &= search_span_list results.append(result) return results
def build_segment_list_one(engine, gps_start_time, gps_end_time, ifo, segment_name, version=None, start_pad=0, end_pad=0): """Builds a list of segments satisfying the given criteria """ seg_result = segmentlist([]) sum_result = segmentlist([]) # Is there any way to get segment and segement summary in one query? # Maybe some sort of outer join where we keep track of which segment # summaries we've already seen. sql = "SELECT segment_summary.start_time, segment_summary.end_time " sql += "FROM segment_definer, segment_summary " sql += "WHERE segment_summary.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = '%s' " % ifo if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment_summary.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment_summary.end_time OR segment_summary.start_time > %s)" % ( gps_start_time, gps_end_time) rows = engine.query(sql) for sum_start_time, sum_end_time in rows: sum_start_time = (sum_start_time < gps_start_time) and gps_start_time or sum_start_time sum_end_time = (sum_end_time > gps_end_time) and gps_end_time or sum_end_time sum_result |= segmentlist([segment(sum_start_time, sum_end_time)]) # We can't use queries paramaterized with ? since the ldbd protocol doesn't support it... sql = "SELECT segment.start_time + %d, segment.end_time + %d " % ( start_pad, end_pad) sql += "FROM segment, segment_definer " sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id " if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % ( gps_start_time, gps_end_time) rows = engine.query(sql) for seg_start_time, seg_end_time in rows: seg_start_time = (seg_start_time < gps_start_time) and gps_start_time or seg_start_time seg_end_time = (seg_end_time > gps_end_time) and gps_end_time or seg_end_time seg_result |= segmentlist([segment(seg_start_time, seg_end_time)]) engine.close() return sum_result, seg_result
def start_end_to_segments(start, end): return segmentlist([segment(s, e) for s, e in zip(start, end)])
def run_query_segments(doc, proc_id, engine, gps_start_time, gps_end_time, included_segments_string, excluded_segments_string=None, write_segments=True, start_pad=0, end_pad=0): """Runs a segment query. This was originally part of ligolw_query_segments, but now is also used by ligolw_segments_from_cats. The write_segments option is provided so callers can coalesce segments obtained over sever invocations (as segments_from_cats does). """ if write_segments: all_ifos = {} for ifo, segment_name, version in split_segment_ids( included_segments_string.split(',')): all_ifos[ifo] = True new_seg_def_id = add_to_segment_definer(doc, proc_id, ''.join(list(all_ifos.keys())), 'result', 0) add_to_segment_summary(doc, proc_id, new_seg_def_id, [[gps_start_time, gps_end_time]]) result = segmentlist([]) for ifo, segment_name, version in split_segment_ids( included_segments_string.split(',')): sum_segments, seg_segments = build_segment_list( engine, gps_start_time, gps_end_time, ifo, segment_name, version, start_pad, end_pad) seg_def_id = add_to_segment_definer(doc, proc_id, ifo, segment_name, version) add_to_segment_summary(doc, proc_id, seg_def_id, sum_segments) # and accumulate segments result |= seg_segments # Excluded segments are not required if excluded_segments_string: excluded_segments = segmentlist([]) for ifo, segment_name, version in split_segment_ids( excluded_segments_string.split(',')): sum_segments, seg_segments = build_segment_list( engine, gps_start_time, gps_end_time, ifo, segment_name, version) excluded_segments |= seg_segments result = result - excluded_segments result.coalesce() # Add the segments if write_segments: add_to_segment(doc, proc_id, new_seg_def_id, result) return result
def ReadMultiInspiralTimeSlidesFromFiles(fileList,generate_output_tables=False): """ Read time-slid multiInspiral tables from a list of files @param fileList: list of input files """ if not fileList: return multiInspiralTable(), None multis = None timeSlides = [] segmentDict = {} for thisFile in fileList: doc = utils.load_filename(thisFile, gz=(thisFile or "stdin").endswith(".gz"), contenthandler = lsctables.use_in(ligolw.LIGOLWContentHandler)) # Extract the time slide table timeSlideTable = lsctables.TimeSlideTable.get_table(doc) slideMapping = {} currSlides = {} # NOTE: I think some of this is duplicated in the glue definition of the # time slide table. Probably should move over to that for slide in timeSlideTable: currID = int(slide.time_slide_id) if currID not in currSlides.keys(): currSlides[currID] = {} currSlides[currID][slide.instrument] = slide.offset elif slide.instrument not in currSlides[currID].keys(): currSlides[currID][slide.instrument] = slide.offset for slideID,offsetDict in currSlides.items(): try: # Is the slide already in the list and where? offsetIndex = timeSlides.index(offsetDict) slideMapping[slideID] = offsetIndex except ValueError: # If not then add it timeSlides.append(offsetDict) slideMapping[slideID] = len(timeSlides) - 1 # Get the mapping table segmentMap = {} timeSlideMapTable = lsctables.TimeSlideSegmentMapTable.get_table(doc) for entry in timeSlideMapTable: segmentMap[int(entry.segment_def_id)] = int(entry.time_slide_id) # Extract the segment table segmentTable = lsctables.SegmentTable.get_table(doc) for entry in segmentTable: currSlidId = segmentMap[int(entry.segment_def_id)] currSeg = entry.get() if not segmentDict.has_key(slideMapping[currSlidId]): segmentDict[slideMapping[currSlidId]] = segments.segmentlist() segmentDict[slideMapping[currSlidId]].append(currSeg) segmentDict[slideMapping[currSlidId]].coalesce() # extract the multi inspiral table try: multiInspiralTable = lsctables.MultiInspiralTable.get_table(doc) # Remap the time slide IDs for multi in multiInspiralTable: newID = slideMapping[int(multi.time_slide_id)] multi.time_slide_id = ilwd.ilwdchar(\ "time_slide:time_slide_id:%d" % (newID)) if multis: multis.extend(multiInspiralTable) else: multis = multiInspiralTable # except: multiInspiralTable = None except: raise if not generate_output_tables: return multis,timeSlides,segmentDict else: # Make a new time slide table timeSlideTab = lsctables.New(lsctables.TimeSlideTable) for slideID,offsetDict in enumerate(timeSlides): for instrument in offsetDict.keys(): currTimeSlide = lsctables.TimeSlide() currTimeSlide.instrument = instrument currTimeSlide.offset = offsetDict[instrument] currTimeSlide.time_slide_id = ilwd.ilwdchar(\ "time_slide:time_slide_id:%d" % (slideID)) currTimeSlide.process_id = ilwd.ilwdchar(\ "process:process_id:%d" % (0)) timeSlideTab.append(currTimeSlide) # Make a new mapping table timeSlideSegMapTab = lsctables.New(lsctables.TimeSlideSegmentMapTable) for i in range(len(timeSlides)): currMapEntry = lsctables.TimeSlideSegmentMap() currMapEntry.time_slide_id = ilwd.ilwdchar(\ "time_slide:time_slide_id:%d" % (i)) currMapEntry.segment_def_id = ilwd.ilwdchar(\ "segment_def:segment_def_id:%d" % (i)) timeSlideSegMapTab.append(currMapEntry) # Make a new segment table newSegmentTable = lsctables.New(lsctables.SegmentTable) segmentIDCount = 0 for i in range(len(timeSlides)): currSegList = segmentDict[i] for seg in currSegList: currSegment = lsctables.Segment() currSegment.segment_id = ilwd.ilwdchar(\ "segment:segment_id:%d" %(segmentIDCount)) segmentIDCount += 1 currSegment.segment_def_id = ilwd.ilwdchar(\ "segment_def:segment_def_id:%d" % (i)) currSegment.process_id = ilwd.ilwdchar(\ "process:process_id:%d" % (0)) currSegment.set(seg) currSegment.creator_db = -1 currSegment.segment_def_cdb = -1 newSegmentTable.append(currSegment) return multis,timeSlides,segmentDict,timeSlideTab,newSegmentTable,\ timeSlideSegMapTab
from glue import segments from glue.ligolw import ligolw, lsctables, table, utils from glue.ligolw.utils import segments as ligolw_segments from glue.segmentdb import query_engine, segmentdb_utils from glue.ligolw.utils import process as ligolw_process from pylal.dq.dqTriggerUtils import def_get_time from scipy.stats import poisson LIGOTimeGPS = lsctables.LIGOTimeGPS # Some boilerplate to make segmentlists picklable import copy_reg copy_reg.pickle(type(segments.segment(0, 1)), \ lambda x:(segments.segment, (x[0], x[1]))) copy_reg.pickle(type(segments.segmentlist([])), lambda x: (segments.segmentlist, ([y for y in x], ))) from glue import git_version __author__ = "Andrew P Lundgren <*****@*****.**>, Duncan Macleod <*****@*****.**>" __version__ = "git id %s" % git_version.id __date__ = git_version.date """ This module provides useful segment and veto tools for data quality investigations. """ # ============================================================================== # Function to load segments from an xml file # ==============================================================================
]) return fig fig = newfig(options.segment) axes = fig.gca() rate.filter_array(trigger_rate.array, rate.gaussian_window(bins_per_filterwidth)) axes.plot(trigger_rate.centres()[0], trigger_rate.at_centres()) axes.set_xlim(list(options.segment)) axes.grid(True) for seg in ~seglist & segments.segmentlist([options.segment]): axes.axvspan(seg[0], seg[1], facecolor="k", alpha=0.2) axes.set_title( "%s Excess Power Trigger Rate vs. Time\n(%d Triggers, %g s Moving Average)" % (options.instrument, num_triggers, options.window)) ticks = make_xticks(options.segment) axes.set_xticks(ticks[0]) axes.set_xticklabels(ticks[1], horizontalalignment="right", fontsize=10, rotation=10) axes.set_xlabel("UTC") #axes.yticks(fontsize = 10) axes.set_ylabel("Trigger Rate (Hz)")
def setup_analysislogging(workflow, segs_list, insps, args, output_dir, program_name="workflow", tags=[]): """ This module sets up the analysis logging xml file that contains the following information: * Command line arguments that the code was run with * Segment list of times marked as SCIENCE * Segment list of times marked as SCIENCE and "OK" ie. not CAT_1 vetoed * Segment list of times marked as SCIENCE_OK and present on the cluster * The times that will be analysed by the matched-filter jobs Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance. segs_list : pycbc.workflow.core.FileList A list of Files containing the information needed to generate the segments above. For segments generated at run time the associated segmentlist is a property of this object. insps : pycbc.workflow.core.FileList The output files from the matched-filtering module. Used to identify what times have been analysed in this workflow. output_dir : path Directory to output any files to. program_name : string (optional, default = "workflow") The program name to stick in the process/process_params tables. tags : list (optional, default = []) If given restrict to considering inspiral and segment files that are tagged with all tags in this list. """ logging.info("Entering analysis logging module.") make_analysis_dir(output_dir) # Construct the summary XML file outdoc = ligolw.Document() outdoc.appendChild(ligolw.LIGO_LW()) # Add process and process_params tables proc_id = process.register_to_xmldoc(outdoc, program_name, vars(args) ).process_id # Now add the various segment lists to this file summ_segs = segmentlist([workflow.analysis_time]) # If tags is given filter by tags if tags: for tag in tags: segs_list = segs_list.find_output_with_tag(tag) insps = insps.find_output_with_tag(tag) for ifo in workflow.ifos: # Lets get the segment lists we need seg_ifo_files = segs_list.find_output_with_ifo(ifo) # SCIENCE sci_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE') if len(sci_seg_file) == 1: sci_seg_file = sci_seg_file[0] sci_segs = sci_seg_file.segmentList sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sci_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id, summ_segs, comment='') elif sci_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_seg_file), ifo, 'SCIENCE') #raise ValueError(err_msg) # SCIENCE_OK sci_ok_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE_OK') if len(sci_ok_seg_file) == 1: sci_ok_seg_file = sci_ok_seg_file[0] sci_ok_segs = sci_ok_seg_file.segmentList sci_ok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_OK", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_ok_def_id, sci_ok_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_ok_def_id, summ_segs, comment='') elif sci_ok_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_ok_seg_file), ifo, 'SCIENCE_OK') #raise ValueError(err_msg) # SCIENCE_AVAILABLE sci_available_seg_file = seg_ifo_files.find_output_with_tag(\ 'SCIENCE_AVAILABLE') if len(sci_available_seg_file) == 1: sci_available_seg_file = sci_available_seg_file[0] sci_available_segs = sci_available_seg_file.segmentList sci_available_def_id = segmentdb_utils.add_to_segment_definer(\ outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_AVAILABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_available_def_id, sci_available_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_available_def_id, summ_segs, comment='') elif sci_available_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_available_seg_file), ifo, 'SCIENCE_AVAILABLE') #raise ValueError(err_msg) # ANALYSABLE - This one needs to come from inspiral outs ifo_insps = insps.find_output_with_ifo(ifo) analysable_segs = ifo_insps.get_times_covered_by_files() analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysable_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, analysable_def_id, summ_segs, comment='') summ_file = File(workflow.ifos, "WORKFLOW_SUMMARY", workflow.analysis_time, extension=".xml", directory=output_dir) summ_file.PFN(summ_file.storage_path, site='local') utils.write_filename(outdoc, summ_file.storage_path) return FileList([summ_file])
def calculate_combined_result(includedJSON, excludedJSON, startTime, endTime, ifo): """ Calculate the result of the union of the active times for the included flag less the intersection of that result with the union of the excluded flags Inputs are 2 lists of python dictionaries representing the JSON (already have run json.loads() on the JSON), a start time, and end time, and the ifo name (it does not make sense to include/exclude across multiple ifos) Parameters ---------- startTime : `int` Ex: 999999999 endTime : `int` Ex: 999999999 ifo : `string` Ex: 'L1' """ total_active_list = segments.segmentlist([]) for flag in includedJSON: #result=json.loads(flag) #flagDict=result['flags'][0] active_list = flag['active'] active_segments = segments.segmentlist( [segments.segment(x[0], x[1]) for x in active_list]) total_active_list = total_active_list + active_segments total_active_list.coalesce() for flag in excludedJSON: #result=json.loads(flag) #flagDict=result['flags'][0] active_list = flag['active'] active_segments = segments.segmentlist( [segments.segment(x[0], x[1]) for x in active_list]) total_active_list = total_active_list - active_segments total_active_list.coalesce() # Now, total_active_list contains a segmentlist object with segments spanning the expected result # includedJSON and excludedJSON contain lists of JSON text blobs (not parsed with json.loads yet) ## Note: About known segments for the result: We just report the start and end time of the period queried! If you wanted to report the actual validity of multiple segments, it's somewhat undefined if the excluded ones and/or some of the included flags aren't known about for a time when the included ones are; Technically since exclusion trumps all inclusions, if an excluded segment is known and active at any given time, the result is known for that time explicitly. result_known_segment_list = segments.segmentlist( [segments.segment(startTime, endTime)]) ## Now we have to build the JSON for this flag # JSON flag objects looks like this (each is a dictionary!): # { # "ifo" : "ifo", # "name" : "flag", # "version" : n, # "comment" : "description", # "provenance_url" : "aLog URL", # "deactivated" : false|true, # "active_indicates_ifo_badness" : true|false|null, # // known segments returned for both /active and /known URIs, no segments are returned for the /metadata or /report/flags queries # // aka S6 summary segments # "known" : [ [ts,te], [ts,te], ... ] # // active segments returned only for /active URI: # "active" : [ [ts,te], [ts,te], ... ] # // \textcolor{red}{Comment: or "segment" : [ [ts,te,value], [ts,te,value], ...] (where value can be -1,0 or +1)} # // inactive == (known - active) # // unknown == (all_time - known) # }, ## Make the json-ready flag dictionary for the combined result: ifo = ifo # replicating old behavoir from ligolw_segment_query # Note: This just uses the ifo of the last excluded flag! name = 'RESULT' version = 1 known_segments = result_known_segment_list active_segments = total_active_list result_flag = jsonhelper.buildFlagDict(ifo, name, version, known_segments, active_segments) return result_flag
dag.add_node(df) # modify the start and end time by pad seconds log_fh.write("gps_start_time = %d\n" % gps_start_time) log_fh.write("gps_end_time = %d\n" % gps_end_time) # Don't need to do these, since we'll pad each segment # gps_start_time += pad # gps_end_time -= pad # log_fh.write("gps_start_time + pad = %d\n" % gps_start_time) # log_fh.write("gps_end_time - pad = %d\n" % gps_end_time) # Determine the times to analyze job_segs = segments.segmentlist() analysis_segs = segments.segmentlist() for seg in available_segments: seg_start = seg[0] + pad seg_end = seg[1] - pad if seg_end - seg_start < length: "Segment from %d to %d is too short, skipping" % (seg_start, seg_end) continue while seg_start + length < seg_end: job_segs.append( segments.segment(seg_start, seg_start + length) ) analysis_segs.append( segments.segment(seg_start + overlap/2, seg_start + length - overlap/2) ) seg_start += length - overlap
def compute_thinca_livetime(on_instruments, off_instruments, rings, vetoseglistdict, offsetvectors): """ @on_instruments is an iterable of the instruments that must be on. @off_instruments is an iterable of the instruments that must be off. on_instruments and off_instruments must be disjoint. @rings is a list of segments defining the analysis ring boundaries. They can overlap, and do not need to be ordered. @vetoseglistdict is a coalesced glue.segments.segmentlistdict object providing the veto segments for whatever instruments have vetoes defined for them. This can include veto lists for instruments other than those listed in on_ and off_instruments (extra veto lists will be ignored), and it need not provide lists for all instruments (instruments for which there are no veto segment lists are assumed to be on at all times). @offsetvectors is an iterable of dictionaries of instrument-offset pairs. Each dictionary must contain entries for all instruments in the union of on_instruments and off_instruments (it is allowed to name others as well, but they will be ignored). An example of one dictionary of instrument-offset pairs: {"H1": 0.0, "H2": 5.0, "L1": 10.0}. The return value is a float giving the livetime in seconds. """ # local copies so they can be modified and iterated over more than once # (in case generator expressions have been passed in) on_instruments = set(on_instruments) off_instruments = set(off_instruments) # check that the on and off instruments are disjoint if on_instruments & off_instruments: raise ValueError, "on_instruments and off_instruments not disjoint" # instruments that are not vetoed are assumed to be on on_instruments &= set(vetoseglistdict.keys()) # performance aid: only need offsets for instruments whose state is # important all_instruments = on_instruments | off_instruments offsetvectors = tuple( dict((key, value) for key, value in offsetvector.items() if key in all_instruments) for offsetvector in offsetvectors) # performance aid: if there are no offset vectors to consider, the # livetime is trivial if not offsetvectors: return [] # check that each offset vector provides values for all instruments of # interest for offsetvector in offsetvectors: if not set(offsetvector.keys()).issuperset(all_instruments): raise ValueError, "incomplete offset vector %s; missing instrument(s) %s" % ( repr(offsetvector), ", ".join(all_instruments - set(offsetvector.keys()))) # initialize the livetime sums live_time = [0.0] * len(offsetvectors) # the livetime is trivial if an instrument that must be off is never # vetoed if not set(vetoseglistdict.keys()).issuperset(off_instruments): return live_time # performance aid: don't need veto segment lists for instruments whose # state is unimportant, nor veto segments that don't intersect the rings coalesced_rings = segments.segmentlist(rings).coalesce() vetoseglistdict = segments.segmentlistdict( (key, segments.segmentlist(seg for seg in seglist if coalesced_rings.intersects_segment(seg))) for key, seglist in vetoseglistdict.items() if key in all_instruments) # tot up the time when exactly the instruments that must be on are on for ring in rings: # don't do this in loops ring = segments.segmentlist([ring]) # performance aid: this is done in the loop, inside # slideSegListDictOnRing(), but we can make that go faster by doing it # here first clipped_vetoseglistdict = segments.segmentlistdict( (key, seglist & ring) for key, seglist in vetoseglistdict.items()) # performance aid: if an instrument that must be vetoed is never # vetoed in this ring, the livetime is zero if not all(clipped_vetoseglistdict[key] for key in off_instruments): continue # iterate over offset vectors for n, offsetvector in enumerate(offsetvectors): # apply the offset vector to the vetoes, wrapping around the ring slidvetoes = slideSegListDictOnRing(ring[0], clipped_vetoseglistdict, offsetvector) # slidvetoes = times when instruments are vetoed, # slidvetoes.union(on_instruments) = times when an instrument that # must be on is vetoed # # ~slidvetoes = times when instruments are not vetoed, # (~slidvetoes).union(off_instruments) = times when an instrument # that must be off is not vetoed live_time[n] += float( abs(ring - slidvetoes.union(on_instruments) - (~slidvetoes).union(off_instruments))) # done return live_time
def getSegments(seglistin, interval): seglistout = segments.segmentlist([s for s in seglistin \ if (s[1] > interval[0] and s[0] < interval[1]) ]) return seglistout
def calculate_versionless_result(jsonResults, startTime, endTime, ifo_input=None): """ Construct output segments lists from multiple JSON objects. The jsonResults input is a list of json ojbects and are expected to be in order of decreasing versions. """ debug = False active_results = {} segment_known_results = {} affected_results = {} total_active_list = segments.segmentlist([]) total_query_time = segments.segmentlist( [segments.segment(startTime, endTime)]) total_known_list = segments.segmentlist([]) for resultin in jsonResults: #result=json.loads(resultin) result = resultin # old : flagDict=result['flags'][0] # Our queries above each return 1 flag version = int(result['version']) deactivated_state = result['metadata']['deactivated'] if str(deactivated_state) in ["False", "false"]: known_list = result['known'] known_segments = segments.segmentlist([ segments.segment(x[0], x[1]) for x in known_list ]) # make a segment list object to do arithmetic known_segments.coalesce() segment_known_results[version] = known_segments active_list = result['active'] active_segments = segments.segmentlist([ segments.segment(x[0], x[1]) for x in active_list ]) # make a segment list object to do arithmetic active_segments.coalesce() active_results[version] = active_segments if debug: print "Active results for version %d" % version print active_results[version] # Now I have 2 dictionaries of known and active segments with versions as keys in case I want/need them later... # This next step might seem a bit confusing: # We need to take the active segments for this version only during times that were not known by higher segment versions # Thus we need to take the intersection (& operator) of the unknown segments across all higher versions with the known segments for this version, then take the intersection of that result with the active segments for this version, and then add that to the current list of total active segments... phew: total_active_list |= (total_query_time - total_known_list ) & known_segments & active_segments if debug: import pdb print "Running pdb to see what is in total_active_list" pdb.set_trace() total_active_list.coalesce() # The S6 clients want to know about the range of times affected by a given version explicitly, so those are calculated here: affected_results[version] = (total_query_time - total_known_list) & known_segments # Note that the order matters here! we use the total_known_list from the previous iteration of the loop step to figure out which active segments to use in this iteration of the loop, so the above line must come before the next total_known_list |= known_segments total_known_list.coalesce() if ifo_input == None: if len(jsonResults) == 0: import exceptions exceptions.RuntimeError( "No versions for flag in versionless query") else: ifo = result['ifo'] else: #Only use ifo_input if we can't extract the ifo from the json result (usually because json result is empty) try: ifo = result['ifo'] except: ifo = ifo_input name = 'RESULT' # Fix!!! Executive decision to make this clear that this is not a specific IFO:FLAG:VERSION resource, but rather a contrived result version = 1 # Fix!!! Executive decision to make this match what old clients expect # I would prefer that this is more clear that this is not a specific IFO:FLAG:VERSION resource, but rather a contrived result, possibly by making it version 0 total_active_list.coalesce() total_known_list.coalesce() result_flag = jsonhelper.buildFlagDict(ifo, name, version, total_known_list, total_active_list) return result_flag, affected_results
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None, tags=None): """ Setup datafind section of the workflow. This section is responsible for generating, or setting up the workflow to generate, a list of files that record the location of the frame files needed to perform the analysis. There could be multiple options here, the datafind jobs could be done at run time or could be put into a dag. The subsequent jobs will know what was done here from the OutFileList containing the datafind jobs (and the Dagman nodes if appropriate. For now the only implemented option is to generate the datafind files at runtime. This module can also check if the frameFiles actually exist, check whether the obtained segments line up with the original ones and update the science segments to reflect missing data files. Parameters ---------- workflow: pycbc.workflow.core.Workflow The workflow class that stores the jobs that will be run. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. outputDir : path All output files written by datafind processes will be written to this directory. seg_file : SegFile, optional (default=None) The file returned by get_science_segments containing the science segments and the associated segment_summary. This will be used for the segment_summary test and is required if, and only if, performing that test. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniqueify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- datafindOuts : OutGroupList List of all the datafind output files for use later in the pipeline. sci_avlble_file : SegFile SegFile containing the analysable time after checks in the datafind module are applied to the input segment list. For production runs this is expected to be equal to the input segment list. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. If the updateSegmentTimes kwarg is given this will be updated to reflect any instances of missing data. sci_avlble_name : string The name with which the analysable time is stored in the sci_avlble_file. """ if tags == None: tags = [] logging.info("Entering datafind module") make_analysis_dir(outputDir) cp = workflow.cp # Parse for options in ini file datafindMethod = cp.get_opt_tags("workflow-datafind", "datafind-method", tags) if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps", tags): checkSegmentGaps = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-gaps", tags) else: checkSegmentGaps = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist", tags): checkFramesExist = cp.get_opt_tags("workflow-datafind", "datafind-check-frames-exist", tags) else: checkFramesExist = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-segment-summary", tags): checkSegmentSummary = cp.get_opt_tags( "workflow-datafind", "datafind-check-segment-summary", tags) else: checkSegmentSummary = "no_test" logging.info("Starting datafind with setup_datafind_runtime_generated") if datafindMethod == "AT_RUNTIME_MULTIPLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_SINGLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_SINGLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "FROM_PREGENERATED_LCF_FILES": ifos = scienceSegs.keys() datafindcaches, datafindouts = \ setup_datafind_from_pregenerated_lcf_files(cp, ifos, outputDir, tags=tags) else: msg = "Entry datafind-method in [workflow-datafind] does not have " msg += "expected value. Valid values are " msg += "AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES " msg += "AT_RUNTIME_MULTIPLE_CACHES or AT_RUNTIME_SINGLE_CACHES. " msg += "Consult the documentation for more info." raise ValueError(msg) using_backup_server = False if datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES" or \ datafindMethod == "AT_RUNTIME_SINGLE_FRAMES": if cp.has_option_tags("workflow-datafind", "datafind-backup-datafind-server", tags): using_backup_server = True backup_server = cp.get_opt_tags("workflow-datafind", "datafind-backup-datafind-server", tags) cp_new = copy.deepcopy(cp) cp_new.set("workflow-datafind", "datafind-ligo-datafind-server", backup_server) cp_new.set('datafind', 'urltype', 'gsiftp') backup_datafindcaches, backup_datafindouts =\ setup_datafind_runtime_frames_single_call_perifo(cp_new, scienceSegs, outputDir, tags=tags) backup_datafindouts = datafind_keep_unique_backups(\ backup_datafindouts, datafindouts) datafindcaches.extend(backup_datafindcaches) datafindouts.extend(backup_datafindouts) logging.info("setup_datafind_runtime_generated completed") # If we don't have frame files covering all times we can update the science # segments. if checkSegmentGaps in ['warn', 'update_times', 'raise_error']: logging.info("Checking science segments against datafind output....") newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) logging.info("Datafind segments calculated.....") missingData = False msg = "Any errors directly following this message refer to times that" msg += " the segment server says are science, but datafind cannot find" msg += "frames for:" logging.info(msg) for ifo in scienceSegs.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: msg = "No input science segments for ifo %s " % (ifo) msg += "so, surprisingly, no data has been found. " msg += "Was this expected?" logging.warning(msg) continue if not newScienceSegs.has_key(ifo): msg = "IFO %s's science segments " % (ifo) msg += "are completely missing." logging.error(msg) missingData = True if checkSegmentGaps == 'update_times': scienceSegs[ifo] = segments.segmentlist() continue missing = scienceSegs[ifo] - newScienceSegs[ifo] if abs(missing): msg = "From ifo %s we are missing frames covering:" % (ifo) msg += "\n%s" % "\n".join(map(str, missing)) missingData = True logging.error(msg) if checkSegmentGaps == 'update_times': # Remove missing time, so that we can carry on if desired logging.info("Updating science times for ifo %s." % (ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missing if checkSegmentGaps == 'raise_error' and missingData: raise ValueError("Workflow cannot find needed data, exiting.") logging.info("Done checking, any discrepancies are reported above.") elif checkSegmentGaps == 'no_test': # Do nothing pass else: errMsg = "checkSegmentGaps kwArg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Do all of the frame files that were returned actually exist? if checkFramesExist in ['warn', 'update_times', 'raise_error']: logging.info("Verifying that all frames exist on disk.") missingFrSegs, missingFrames = \ get_missing_segs_from_frame_file_cache(datafindcaches) missingFlag = False for ifo in missingFrames.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: continue # If using a backup server, does the frame exist remotely? if using_backup_server: # WARNING: This will be slow, but hopefully it will not occur # for too many frames. This could be optimized if # it becomes necessary. new_list = [] for frame in missingFrames[ifo]: for dfout in datafindouts: dfout_pfns = list(dfout.pfns) dfout_urls = [a.url for a in dfout_pfns] if frame.url in dfout_urls: pfn = dfout_pfns[dfout_urls.index(frame.url)] dfout.removePFN(pfn) if len(dfout.pfns) == 0: new_list.append(frame) else: msg = "Frame %s not found locally. "\ %(frame.url,) msg += "Replacing with remote url(s) " msg += "%s." \ %(str([a.url for a in dfout.pfns]),) logging.info(msg) break else: new_list.append(frame) missingFrames[ifo] = new_list if missingFrames[ifo]: msg = "From ifo %s we are missing the following frames:" % ( ifo) msg += '\n'.join([a.url for a in missingFrames[ifo]]) missingFlag = True logging.error(msg) if checkFramesExist == 'update_times': # Remove missing times, so that we can carry on if desired logging.info("Updating science times for ifo %s." % (ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo] if checkFramesExist == 'raise_error' and missingFlag: raise ValueError("Workflow cannot find all frames, exiting.") logging.info("Finished checking frames.") elif checkFramesExist == 'no_test': # Do nothing pass else: errMsg = "checkFramesExist kwArg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Check if there are cases where frames exist, but no entry in the segment # summary table are present. if checkSegmentSummary in ['warn', 'raise_error']: logging.info("Checking the segment summary table against frames.") dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) missingFlag = False # NOTE: Should this be overrideable in the config file? sci_seg_name = "SCIENCE" if seg_file is None: err_msg = "You must provide the science segments SegFile object " err_msg += "if using the datafind-check-segment-summary option." raise ValueError(err_msg) if seg_file.seg_summ_dict is None: err_msg = "The provided science segments SegFile object must " err_msg += "contain a valid segment_summary table if using the " err_msg += "datafind-check-segment-summary option." raise ValueError(err_msg) seg_summary_times = seg_file.seg_summ_dict for ifo in dfScienceSegs.keys(): curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name] missing = (dfScienceSegs[ifo] & seg_file.valid_segments) missing.coalesce() missing = missing - curr_seg_summ_times missing.coalesce() scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo] scienceButNotFrame.coalesce() missing2 = scienceSegs[ifo] - scienceButNotFrame missing2.coalesce() missing2 = missing2 - curr_seg_summ_times missing2.coalesce() if abs(missing): msg = "From ifo %s the following times have frames, " % (ifo) msg += "but are not covered in the segment summary table." msg += "\n%s" % "\n".join(map(str, missing)) logging.error(msg) missingFlag = True if abs(missing2): msg = "From ifo %s the following times have frames, " % (ifo) msg += "are science, and are not covered in the segment " msg += "summary table." msg += "\n%s" % "\n".join(map(str, missing2)) logging.error(msg) missingFlag = True if checkSegmentSummary == 'raise_error' and missingFlag: errMsg = "Segment_summary discrepancy detected, exiting." raise ValueError(errMsg) elif checkSegmentSummary == 'no_test': # Do nothing pass else: errMsg = "checkSegmentSummary kwArg must take a value from 'no_test', " errMsg += "'warn', or 'raise_error'." raise ValueError(errMsg) # Now need to create the file for SCIENCE_AVAILABLE sci_avlble_dict = segments.segmentlistdict() # NOTE: Should this be overrideable in the config file? sci_avlble_name = "SCIENCE_AVAILABLE" for ifo in scienceSegs.keys(): sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo] sci_avlble_file = SegFile.from_segment_list_dict( 'SCIENCE_AVAILABLE', sci_avlble_dict, ifo_list=scienceSegs.keys(), valid_segment=workflow.analysis_time, extension='.xml', tags=tags, directory=outputDir) logging.info("Leaving datafind module") return FileList( datafindouts), sci_avlble_file, scienceSegs, sci_avlble_name
try: bit_mask = int(value) except ValueError: bit_mask = int(value, 16) # try base 16 in case it's hex # # Are we looking outside the range rather than inside? # if min_threshold is not None and max_threshold is not None: invert = min_threshold >= max_threshold else: invert = False if opts.verbose: print "Inverted? %s"% str(invert) seglist = segmentlist([]) for path in cache.pfnlist(): # # Read data # data, start, _, dt, _, _ = Fr.frgetvect1d(path, channel) # # Apply conditions and transform samples to segments # if equals is not None: seglist.extend(dqsegs.equality_data_to_seglist(data, start, dt, equality=equals)) if bit_mask is not None: seglist.extend(dqsegs.mask_data_to_seglist(data, start, dt, mask_on=bit_mask)) else: seglist.extend(dqsegs.threshold_data_to_seglist(data, start, dt, min_threshold=min_threshold, max_threshold=max_threshold, invert=invert))
#!/usr/bin/env python import operator import unittest import numpy as np np.seterr(all="raise") from glue.segments import segment, segmentlist from pylal import metaarray TimeSeries_metadata = { "name": "test", "dt": 0.1, "segments": segmentlist([segment(1, 5)]), "comments": [] } class test_TimeSeries(unittest.TestCase): def test_identity(self): """ See that the TimeSeries wrapping doesn't touch array data """ arr = np.arange(100, dtype=np.float32) spec = metaarray.TimeSeries(arr, TimeSeries_metadata) self.assertTrue((arr == spec.A).all()) arr = np.arange(100, dtype=np.float64) spec = metaarray.TimeSeries(arr, TimeSeries_metadata) self.assertTrue((arr == spec.A).all()) arr = np.ones(100, dtype=np.bool8)
# FIXME: This should be imported from pycbc. DYN_RANGE_FAC = 5.9029581035870565e+20 class psd_segment(segment): def __new__(cls, psd, *args): return segment.__new__(cls, *args) def __init__(self, psd, *args): self.psd = psd psdseglistdict = {} for psd_file in opts.psd_files: (ifo, group), = h5py.File(psd_file, 'r').items() psd = [group['psds'][str(i)] for i in range(len(group['psds']))] psdseglistdict[ifo] = segmentlist( psd_segment(*segargs) for segargs in zip(psd, group['start_time'], group['end_time'])) def reference_psd_for_sngl(sngl): psd = psdseglistdict[sngl.ifo] try: psd = psd[psd.find(sngl.get_end())].psd except ValueError: raise ValueError( 'No PSD found for detector {0} at GPS time {1}'.format( sngl.ifo, sngl.get_end())) flow = psd.file.attrs['low_frequency_cutoff'] df = psd.attrs['delta_f'] kmin = int(flow / df)
def frominjectionfile(file, type, ifo=None, start=None, end=None): """ Read generic injection file object file containing injections of the given type string. Returns an 'Sim' lsctable of the corresponding type. Arguments: file : file object type : [ "inspiral" | "burst" | "ringdown" ] Keyword arguments: ifo : [ "G1" | "H1" | "H2" | "L1" | "V1" ] """ # read type type = type.lower() # read injection xml xml = re.compile('(xml$|xml.gz$)') if re.search(xml,file.name): xmldoc,digest = utils.load_fileobj(file) injtable = table.get_table(xmldoc,'sim_%s:table' % (type)) # read injection txt else: cchar = re.compile('[#%<!()_\[\]{}:;\'\"]+') #== construct new Sim{Burst,Inspiral,Ringdown}Table injtable = lsctables.New(lsctables.__dict__['Sim%sTable' % (type.title())]) if type=='inspiral': columns = ['geocent_end_time.geocent_end_time_ns',\ 'h_end_time.h_end_time_ns',\ 'l_end_time.l_end_time_ns',\ 'v_end_time.v_end_time_ns',\ 'distance'] for line in file.readlines(): if re.match(cchar,line): continue # set up siminspiral object inj = lsctables.SimInspiral() # split data sep = re.compile('[\s,=]+') data = sep.split(line) # set attributes inj.geocent_end_time = int(data[0].split('.')[0]) inj.geocent_end_time_ns = int(data[0].split('.')[1]) inj.h_end_time = int(data[1].split('.')[0]) inj.h_end_time_ns = int(data[1].split('.')[1]) inj.l_end_time = int(data[2].split('.')[0]) inj.l_end_time_ns = int(data[2].split('.')[1]) inj.v_end_time = int(data[3].split('.')[0]) inj.v_end_time_ns = int(data[3].split('.')[1]) inj.distance = float(data[4]) injtable.append(inj) if type=='burst': if file.readlines()[0].startswith('filestart'): # if given parsed burst file file.seek(0) snrcol = { 'G1':23, 'H1':19, 'L1':21, 'V1':25 } for line in file.readlines(): inj = lsctables.SimBurst() # split data sep = re.compile('[\s,=]+') data = sep.split(line) # set attributes # gps time if 'burstgps' in data: idx = data.index('burstgps')+1 geocent = LIGOTimeGPS(data[idx]) inj.time_geocent_gps = geocent.seconds inj.time_geocent_gps_ns = geocent.nanoseconds else: continue #inj.waveform = data[4] #inj.waveform_number = int(data[5]) # frequency if 'freq' in data: idx = data.index('freq')+1 inj.frequency = float(data[idx]) else: continue # SNR a.k.a. amplitude if ifo and 'snr%s' % ifo in data: idx = data.index('snr%s' % ifo)+1 inj.amplitude = float(data[idx]) elif 'rmsSNR' in data: idx = data.index('rmsSNR')+1 inj.amplitude = float(data[idx]) else: continue if 'phi' in data: idx = data.index('phi' )+1 inj.ra = float(data[idx])*24/(2*math.pi) if 'theta' in data: idx = data.index('theta' )+1 inj.ra = 90-(float(data[idx])*180/math.pi) if ifo and 'hrss%s' % ifo in data: idx = data.index('hrss%s' % ifo)+1 inj.hrss = float(data[idx]) elif 'hrss' in data: idx = data.index('hrss')+1 inj.hrss = float(data[idx]) # extra columns to be added when I know how #inj.q = 0 #inj.q = float(data[11]) #h_delay = LIGOTimeGPS(data[41]) #inj.h_peak_time = inj.time_geocent_gps+h_delay.seconds #inj.h_peak_time_ns = inj.time_geocent_gps_ns+h_delay.nanoseconds #l_delay = LIGOTimeGPS(data[43]) #inj.l_peak_time = inj.time_geocent_gps+l_delay.seconds #inj.l_peak_time_ns = inj.time_geocent_gps_ns+l_delay.nanoseconds #v_delay = LIGOTimeGPS(data[43]) #inj.v_peak_time = inj.time_geocent_gps+v_delay.seconds #inj.v_peak_time_ns = inj.time_geocent_gps_ns+v_delay.nanoseconds injtable.append(inj) else: # if given parsed burst file file.seek(0) for line in file.readlines(): inj = lsctables.SimBurst() # split data sep = re.compile('[\s,]+') data = sep.split(line) # set attributes geocent = LIGOTimeGPS(data[0]) inj.time_geocent_gps = geocent.seconds inj.time_geocent_gps_ns = geocent.nanoseconds injtable.append(inj) injections = table.new_from_template(injtable) if not start: start = 0 if not end: end = 9999999999 span = segments.segmentlist([ segments.segment(start, end) ]) get_time = dqTriggerUtils.def_get_time(injections.tableName) injections.extend(inj for inj in injtable if get_time(inj) in span) return injections