def load_cache(xmldoc, cache, sieve_pattern, exact_match=False, verbose=False): """ Return a parsed and ligolw_added XML document from the files matched by sieve_pattern in the given cache. """ subcache = cache.sieve(description=sieve_pattern, exact_match=exact_match) found, missed = subcache.checkfilesexist() if len(found) == 0: print >>sys.stderr, "warning: no files found for pattern %s" \ % sieve_pattern # turn on event_id mangling old_id = lsctables.SnglInspiralTable.next_id lsctables.SnglInspiralTable.next_id = SnglInspiralID_old(0) # reduce memory footprint at the expense of speed # table.TableStream.RowBuilder = table.InterningRowBuilder urls = [c.url for c in found] try: xmldoc = ligolw_add.ligolw_add(ligolw.Document(), urls, verbose=verbose) except ligolw.ElementError: # FIXME: backwards compatibility for int_8s SnglInspiralTable event_ids lsctables.SnglInspiralTable.validcolumns["event_id"] = "int_8s" lsctables.SnglInspiralID = int xmldoc = ligolw_add.ligolw_add(ligolw.Document(), urls, verbose=verbose) # turn off event_id mangling lsctables.SnglInspiralTable.next_id = old_id return xmldoc
def load_cache(xmldoc, cache, sieve_pattern, exact_match=False, verbose=False): """ Return a parsed and ligolw_added XML document from the files matched by sieve_pattern in the given cache. """ subcache = cache.sieve(description=sieve_pattern, exact_match=exact_match) found, missed = subcache.checkfilesexist() if len(found) == 0: print >>sys.stderr, "warning: no files found for pattern %s" \ % sieve_pattern # turn on event_id mangling old_id = lsctables.SnglInspiralTable.next_id lsctables.SnglInspiralTable.next_id = SnglInspiralID_old(0) # reduce memory footprint at the expense of speed # table.RowBuilder = table.InterningRowBuilder urls = [c.url for c in found] try: xmldoc = ligolw_add.ligolw_add(ligolw.Document(), urls, verbose=verbose) except ligolw.ElementError: # FIXME: backwards compatibility for int_8s SnglInspiralTable event_ids lsctables.SnglInspiralTable.validcolumns["event_id"] = "int_8s" lsctables.SnglInspiralID = int xmldoc = ligolw_add.ligolw_add(ligolw.Document(), urls, verbose=verbose) # turn off event_id mangling lsctables.SnglInspiralTable.next_id = old_id return xmldoc
def run_segment_operation(outdoc, filenames, segments, use_segment_table, operation, result_name = 'RESULT', preserve = True): """ Performs an operation (intersect or union) across a set of segments. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc and a list of segments DMT-FLAG1,DMT-FLAG1 this returns RESULT = (table 1's DMT-FLAG1 union table 2's DMT-FLAG1 union ...) operation (table 1's DMT-FLAG2 union table 2's DMT-FLAG2 union ...) operation etc """ proc_id = lsctables.ProcessTable.get_table(outdoc)[0].process_id if preserve: indoc = ligolw_add.ligolw_add(outdoc, filenames) else: indoc = ligolw_add.ligolw_add(ligolw.Document(), filenames) # Start with a segment covering all of time, then # intersect with each of the fields of interest keys = segments.split(',') if operation == INTERSECT: sgmntlist = glue.segments.segmentlist([glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity())]) for key in keys: sgmntlist &= find_segments(indoc, key, use_segment_table) elif operation == UNION: sgmntlist = glue.segments.segmentlist([]) for key in keys: sgmntlist |= find_segments(indoc, key, use_segment_table) elif operation == DIFF: sgmntlist = find_segments(indoc, keys[0], use_segment_table) for key in keys[1:]: sgmntlist -= find_segments(indoc, key, use_segment_table) else: raise NameError("%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer and segments seg_def_id = add_to_segment_definer(outdoc, proc_id, '', result_name, 1) if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, sgmntlist) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, sgmntlist) return outdoc, abs(sgmntlist)
def run_segment_operation(outdoc, filenames, segments, use_segment_table, operation, result_name = 'RESULT', preserve = True): """ Performs an operation (intersect or union) across a set of segments. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc and a list of segments DMT-FLAG1,DMT-FLAG1 this returns RESULT = (table 1's DMT-FLAG1 union table 2's DMT-FLAG1 union ...) operation (table 1's DMT-FLAG2 union table 2's DMT-FLAG2 union ...) operation etc """ proc_id = table.get_table(outdoc, lsctables.ProcessTable.tableName)[0].process_id if preserve: indoc = ligolw_add.ligolw_add(outdoc, filenames) else: indoc = ligolw_add.ligolw_add(ligolw.Document(), filenames) # Start with a segment covering all of time, then # intersect with each of the fields of interest keys = segments.split(',') if operation == INTERSECT: sgmntlist = glue.segments.segmentlist([glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity())]) for key in keys: sgmntlist &= find_segments(indoc, key, use_segment_table) elif operation == UNION: sgmntlist = glue.segments.segmentlist([]) for key in keys: sgmntlist |= find_segments(indoc, key, use_segment_table) elif operation == DIFF: sgmntlist = find_segments(indoc, keys[0], use_segment_table) for key in keys[1:]: sgmntlist -= find_segments(indoc, key, use_segment_table) else: raise NameError("%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer and segments seg_def_id = add_to_segment_definer(outdoc, proc_id, '', result_name, 1) if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, sgmntlist) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, sgmntlist) return outdoc, abs(sgmntlist)
def load_external_triggers(filename): doc = ligolw_add.ligolw_add(ligolw.Document(), [filename]) ext_trigger_tables = lsctables.getTablesByType(doc, lsctables.ExtTriggersTable) if ext_trigger_tables is None: print >>sys.stderr, "No tables named external_trigger:table found in " + filename else: assert len(ext_trigger_tables) == 1 # ligolw_add should merge them ext_triggers = ext_trigger_tables[0] return ext_triggers
def load_external_triggers(filename): doc = ligolw_add.ligolw_add(ligolw.Document(), [filename]) ext_trigger_tables = lsctables.table.getTablesByName(doc, lsctables.ExtTriggersTable.tableName) if ext_trigger_tables is None: print >>sys.stderr, "No tables named external_trigger:table found in " + filename else: assert len(ext_trigger_tables) == 1 # ligolw_add should merge them ext_triggers = ext_trigger_tables[0] return ext_triggers
def read_ligolw(source, contenthandler=None, verbose=False, non_lsc_tables_ok=True): """Read one or more LIGO_LW format files Parameters ---------- source : `str`, `file`, `list` one or more open files or file paths to read contenthandler : `~xml.sax.handler.ContentHandler`, optional content handler used to parse document verbose : `bool`, optional be verbose when reading files, default: `False` non_lsc_tables_ok : `bool`, optional if `False` error on unrecognised tables in documents, default: `True` Returns ------- xmldoc : :class:`~glue.ligolw.ligolw.Document` the document object as parsed from the file(s) """ from glue.ligolw.ligolw import (Document, LIGOLWContentHandler) from glue.ligolw import types from glue.ligolw.lsctables import use_in from glue.ligolw.utils.ligolw_add import ligolw_add # mock ToPyType to link to numpy dtypes topytype = types.ToPyType.copy() for key in types.ToPyType: if key in types.ToNumPyType: types.ToPyType[key] = numpy.dtype(types.ToNumPyType[key]).type # set default content handler if contenthandler is None: contenthandler = use_in(LIGOLWContentHandler) # read one or more files into a single Document try: return ligolw_add(Document(), file_list(source), contenthandler=contenthandler, verbose=verbose, non_lsc_tables_ok=non_lsc_tables_ok) finally: # replace ToPyType types.ToPyType = topytype
def gettriggers(plotdesc): doc = ligolw_add.ligolw_add(ligolw.Document(), CacheURLs( eventdisplay.cache[plotdesc.instrument], plotdesc.segment), verbose=False, non_lsc_tables_ok=False) try: plotdesc.seglist = table.get_table( doc, lsctables.SearchSummaryTable.tableName).get_outlist().coalesce() except: plotdesc.seglist = segments.segmentlist() try: bursttable = table.get_table(doc, lsctables.SnglBurstTable.tableName) except: bursttable = lsctables.New(lsctables.SnglBurstTable) try: simtable = table.get_table(doc, lsctables.SimBurstTable.tableName) except: simtable = lsctables.New(lsctables.SnglBurstTable) # cluster if plotdesc.cluster: ligolw_bucluster.ClusterSnglBurstTable( bursttable, SnglBurstUtils.CompareSnglBurstByPeakTimeAndFreq, ligolw_bucluster.SnglBurstCluster, SnglBurstUtils.CompareSnglBurstByPeakTime) # remove triggers and injections that lie outside the required segment bursttable.filterRows( lambda row: row.get_peak() in plotdesc.trig_segment()) simtable.filterRows( lambda row: row.get_time_geocent() in plotdesc.trig_segment()) return bursttable, simtable
def gettriggers(plotdesc): doc = ligolw_add.ligolw_add(ligolw.Document(), CacheURLs(eventdisplay.cache[plotdesc.instrument], plotdesc.segment), verbose = False, non_lsc_tables_ok = False) try: plotdesc.seglist = table.get_table(doc, lsctables.SearchSummaryTable.tableName).get_outlist().coalesce() except: plotdesc.seglist = segments.segmentlist() try: bursttable = table.get_table(doc, lsctables.SnglBurstTable.tableName) except: bursttable = lsctables.New(lsctables.SnglBurstTable) try: simtable = table.get_table(doc, lsctables.SimBurstTable.tableName) except: simtable = lsctables.New(lsctables.SnglBurstTable) # cluster if plotdesc.cluster: ligolw_bucluster.ClusterSnglBurstTable(bursttable, SnglBurstUtils.CompareSnglBurstByPeakTimeAndFreq, ligolw_bucluster.SnglBurstCluster, SnglBurstUtils.CompareSnglBurstByPeakTime) # remove triggers and injections that lie outside the required segment bursttable.filterRows(lambda row: row.get_peak() in plotdesc.trig_segment()) simtable.filterRows(lambda row: row.get_time_geocent() in plotdesc.trig_segment()) return bursttable, simtable
def table_from_file(f, tablename, columns=None, filt=None, contenthandler=None, nproc=1, verbose=False): """Read a `~glue.ligolw.table.Table` from a LIGO_LW file. Parameters ---------- f : `file`, `str`, `CacheEntry`, `list`, `Cache` object representing one or more files. One of - an open `file` - a `str` pointing to a file path on disk - a formatted `~lal.utils.CacheEntry` representing one file - a `list` of `str` file paths - a formatted `~glue.lal.Cache` representing many files tablename : `str` name of the table to read. columns : `list`, optional list of column name strings to read, default all. filt : `function`, optional function by which to `filter` events. The callable must accept as input a row of the table event and return `True`/`False`. contenthandler : `~glue.ligolw.ligolw.LIGOLWContentHandler` SAX content handler for parsing LIGO_LW documents. Returns ------- table : `~glue.ligolw.table.Table` `Table` of data with given columns filled """ from glue.ligolw.ligolw import Document from glue.ligolw import (table, lsctables) from glue.ligolw.utils.ligolw_add import ligolw_add # find table class tableclass = lsctables.TableByName[table.Table.TableName(tablename)] # get content handler if contenthandler is None: contenthandler = get_partial_contenthandler(tableclass) # allow cache multiprocessing if nproc != 1: return tableclass.read(f, columns=columns, contenthandler=contenthandler, nproc=nproc, format='cache') lsctables.use_in(contenthandler) # set columns to read if columns is not None: _oldcols = tableclass.loadcolumns tableclass.loadcolumns = columns # generate Document and populate files = file_list(f) xmldoc = Document() ligolw_add(xmldoc, files, non_lsc_tables_ok=True, contenthandler=contenthandler, verbose=verbose) # extract table out = tableclass.get_table(xmldoc) if verbose: gprint('%d rows found in %s table' % (len(out), out.tableName)) # filter output if filt: if verbose: gprint('filtering rows ...', end=' ') try: out_ = out.copy() except AttributeError: out_ = table.new_from_template(out) out_.extend(filter(filt, out)) out = out_ if verbose: gprint('%d rows remaining\n' % len(out)) # reset loadcolumns and return if columns is not None: tableclass.loadcolumns = _oldcols return out
def run_file_operation(outdoc, filenames, use_segment_table, operation, preserve=True): """ Performs an operation (intersect or union) across a set of files. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc the result is a file where DMT-FLAG1 = (file 1's DMT-FLAG1 operation file 2's DMT-FLAG1 operation ...) DMT-FLAG2 = (file 1's DMT-FLAG2 operation file 2's DMT-FLAG2 operation ...) etc """ proc_id = table.get_table(outdoc, lsctables.ProcessTable.tableName)[0].process_id # load up the files into individual documents xmldocs = [ ligolw_add.ligolw_add(ligolw.Document(), [fname]) for fname in filenames ] # Get the list of dinstinct segment_definers across all docs segment_definers = {} def register_definer(seg_def): key = (seg_def.ifos, seg_def.name, seg_def.version) segment_definers[key] = True return key for xmldoc in xmldocs: seg_def_table = table.get_table(xmldoc, lsctables.SegmentDefTable.tableName) map(register_definer, seg_def_table) # For each unique segment definer, find the intersection for ifo, name, version in segment_definers: if operation == INTERSECT: # If I were feeling especially functional-ist I'd write this # with reduce() result = glue.segments.segmentlist([ glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity()) ]) for xmldoc in xmldocs: result &= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == UNION: result = glue.segments.segmentlist([]) for xmldoc in xmldocs: result |= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == DIFF: result = find_segments(xmldocs[0], '%s:%s:%d' % (ifo, name, version), use_segment_table) for xmldoc in xmldocs[1:]: result -= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) else: raise NameError( "%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer for the result seg_def_id = add_to_segment_definer(outdoc, proc_id, ifo, name, version) # Add the segments if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, result) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, result) # If we're preserving, also load up everything into the output document. if preserve: # Add them to the output document map(lambda x: outdoc.appendChild(x.childNodes[0]), xmldocs) # Merge the ligolw elements and tables ligolw_add.merge_ligolws(outdoc) ligolw_add.merge_compatible_tables(outdoc) return outdoc, abs(result)
def read_flag_dict(f, flags=None, gpstype=LIGOTimeGPS, coalesce=False, contenthandler=GWpyContentHandler, nproc=1): """Read segments for the given flag from the LIGO_LW XML file. Parameters ---------- fp : `str` path of XML file to read. flags : `list`, `None`, optional list of flags to read or `None` to read all into a single `DataQualityFlag`. Returns ------- flagdict : :class:`~gwpy.segments.flag.DataQualityDict` a new `DataQualityDict` of `DataQualityFlag` entries with ``active`` and ``known`` segments seeded from the XML tables in the given file ``fp``. """ if nproc != 1: return DataQualityDict.read(f, flags, coalesce=coalesce, gpstype=gpstype, contenthandler=contenthandler, format='cache', nproc=nproc) # generate Document and populate xmldoc = Document() files = [fp.name if isinstance(fp, (file, GzipFile)) else fp for fp in file_list(f)] ligolw_add(xmldoc, files, non_lsc_tables_ok=True, contenthandler=contenthandler) # read segment definers and generate DataQualityFlag object seg_def_table = lsctables.SegmentDefTable.get_table(xmldoc) # find flags if isinstance(flags, (unicode, str)): flags = flags.split(',') out = DataQualityDict() id_ = dict() if flags is not None and len(flags) == 1 and flags[0] is None: out[None] = DataQualityFlag() id_[None] = [] for row in seg_def_table: ifos = row.get_ifos() name = row.name if ifos and name: name = ':'.join([''.join(row.get_ifos()), row.name]) if row.version is not None: name += ':%d' % row.version else: name = None if flags is None or name in flags: out[name] = DataQualityFlag(name) try: id_[name].append(row.segment_def_id) except (AttributeError, KeyError): id_[name] = [row.segment_def_id] if flags is None and not len(out.keys()): raise RuntimeError("No segment definitions found in file.") elif flags is not None and len(out.keys()) != len(flags): for flag in flags: if flag not in out: raise ValueError("No segment definition found for flag=%r " "in file." % flag) # read segment summary table as 'known' seg_sum_table = lsctables.SegmentSumTable.get_table(xmldoc) for row in seg_sum_table: for flag in out: if not id_[flag] or row.segment_def_id in id_[flag]: try: s = row.get() except AttributeError: s = row.start_time, row.end_time out[flag].known.append(Segment(gpstype(s[0]), gpstype(s[1]))) for dqf in out: if coalesce: out[dqf].coalesce() # read segment table as 'active' seg_table = lsctables.SegmentTable.get_table(xmldoc) for row in seg_table: for flag in out: if not id_[flag] or row.segment_def_id in id_[flag]: try: s = row.get() except AttributeError: s = row.start_time, row.end_time out[flag].active.append(Segment(gpstype(s[0]), gpstype(s[1]))) for dqf in out: if coalesce: out[dqf].coalesce() return out
def run_file_operation(outdoc, filenames, use_segment_table, operation, preserve = True): """ Performs an operation (intersect or union) across a set of files. That is, given a set of files each with segment definers DMT-FLAG1, DMT-FLAG2 etc the result is a file where DMT-FLAG1 = (file 1's DMT-FLAG1 operation file 2's DMT-FLAG1 operation ...) DMT-FLAG2 = (file 1's DMT-FLAG2 operation file 2's DMT-FLAG2 operation ...) etc """ proc_id = table.get_table(outdoc, lsctables.ProcessTable.tableName)[0].process_id # load up the files into individual documents xmldocs = [ligolw_add.ligolw_add(ligolw.Document(), [fname]) for fname in filenames] # Get the list of dinstinct segment_definers across all docs segment_definers = {} def register_definer(seg_def): key = (seg_def.ifos, seg_def.name, seg_def.version) segment_definers[key] = True return key for xmldoc in xmldocs: seg_def_table = table.get_table(xmldoc, lsctables.SegmentDefTable.tableName) map (register_definer, seg_def_table) # For each unique segment definer, find the intersection for ifo, name, version in segment_definers: if operation == INTERSECT: # If I were feeling especially functional-ist I'd write this # with reduce() result = glue.segments.segmentlist([glue.segments.segment(-glue.segments.infinity(), glue.segments.infinity())]) for xmldoc in xmldocs: result &= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == UNION: result = glue.segments.segmentlist([]) for xmldoc in xmldocs: result |= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) elif operation == DIFF: result = find_segments(xmldocs[0], '%s:%s:%d' % (ifo, name, version), use_segment_table) for xmldoc in xmldocs[1:]: result -= find_segments(xmldoc, '%s:%s:%d' % (ifo, name, version), use_segment_table) else: raise NameError ("%s is not a known operation (intersect, union or diff)" % operation) # Add a segment definer for the result seg_def_id = add_to_segment_definer(outdoc, proc_id, ifo, name, version) # Add the segments if use_segment_table: add_to_segment(outdoc, proc_id, seg_def_id, result) else: add_to_segment_summary(outdoc, proc_id, seg_def_id, result) # If we're preserving, also load up everything into the output document. if preserve: # Add them to the output document map(lambda x: outdoc.appendChild(x.childNodes[0]), xmldocs) # Merge the ligolw elements and tables ligolw_add.merge_ligolws(outdoc) ligolw_add.merge_compatible_tables(outdoc) return outdoc, abs(result)
def table_from_file(f, tablename, columns=None, filt=None, contenthandler=None, nproc=1, verbose=False): """Read a `~glue.ligolw.table.Table` from a LIGO_LW file. Parameters ---------- f : `file`, `str`, `CacheEntry`, `list`, `Cache` object representing one or more files. One of - an open `file` - a `str` pointing to a file path on disk - a formatted `~glue.lal.CacheEntry` representing one file - a `list` of `str` file paths - a formatted `~glue.lal.Cache` representing many files tablename : `str` name of the table to read. columns : `list`, optional list of column name strings to read, default all. filt : `function`, optional function by which to `filter` events. The callable must accept as input a row of the table event and return `True`/`False`. contenthandler : `~glue.ligolw.ligolw.LIGOLWContentHandler` SAX content handler for parsing LIGO_LW documents. Returns ------- table : `~glue.ligolw.table.Table` `Table` of data with given columns filled """ # find table class tableclass = lsctables.TableByName[table.StripTableName(tablename)] # get content handler if contenthandler is None: contenthandler = get_partial_contenthandler(tableclass) # allow cache multiprocessing if nproc != 1: return tableclass.read(f, columns=columns, contenthandler=contenthandler, nproc=nproc, format='cache') # set columns to read if columns is not None: _oldcols = tableclass.loadcolumns tableclass.loadcolumns = columns # generate Document and populate files = [fp.name if isinstance(fp, (file, GzipFile)) else fp for fp in file_list(f)] xmldoc = Document() ligolw_add(xmldoc, files, non_lsc_tables_ok=True, contenthandler=contenthandler, verbose=verbose) # extract table try: out = tableclass.get_table(xmldoc) except ValueError: out = lsctables.New(tableclass, columns=columns) if verbose: gprint('%d rows found in %s table' % (len(out), out.tableName)) if filt: if verbose: gprint('filtering rows ...', end=' ') try: out_ = out.copy() except AttributeError: out_ = table.new_from_template(out) out_.extend(filter(filt, out)) out = out_ if verbose: gprint('%d rows remaining\n' % len(out)) if columns is not None: tableclass.loadcolumns = _oldcols return out
def load_external_triggers(filename): doc = ligolw_add.ligolw_add(ligolw.Document(), [filename]) return lsctables.ExtTriggersTable.get_table(doc)
return name == ligolw.Table.tagName and table.Table.TableName(attrs["Name"]) in (lsctables.SnglBurstTable.tableName, lsctables.SearchSummaryTable.tableName) @lsctables.use_in class ContentHandler(ligolw.PartialLIGOLWContentHandler): def __init__(self, doc): ligolw.PartialLIGOLWContentHandler.__init__(self, doc, element_filter) # # use ligolw_add module to load documents, and extract search_summary # table's "in" segment list. # seglist = lsctables.SearchSummaryTable.get_table(ligolw_add.ligolw_add(ligolw.Document(), filenames, verbose = options.verbose, contenthandler = ContentHandler)).get_inlist().coalesce() # # ============================================================================= # # How to generate X axis labels # # ============================================================================= # def make_xticks(segment): # generate tick locations and labels values = list(date.UTCMidnights(*(lal.LIGOTimeGPS(t) for t in segment))) labels = []
@lsctables.use_in class ContentHandler(ligolw.PartialLIGOLWContentHandler): def __init__(self, doc): ligolw.PartialLIGOLWContentHandler.__init__(self, doc, element_filter) # # use ligolw_add module to load documents, and extract search_summary # table's "in" segment list. # seglist = lsctables.SearchSummaryTable.get_table( ligolw_add.ligolw_add( ligolw.Document(), filenames, verbose=options.verbose, contenthandler=ContentHandler)).get_inlist().coalesce() # # ============================================================================= # # How to generate X axis labels # # ============================================================================= # def make_xticks(segment): # generate tick locations and labels values = list(date.UTCMidnights(*(lal.LIGOTimeGPS(t) for t in segment)))