def table_from_pycbc_live(source, ifo=None, columns=None, **kwargs): """Read a `GWRecArray` from one or more PyCBC live files """ source = file_list(source) source = filter_empty_files(source, ifo=ifo) return vstack_tables([ _table_from_file(x, ifo=ifo, columns=columns, **kwargs) for x in source ])
def stack(self, other): """Stack with another EventList in place. Calls `~astropy.table.vstack`. Parameters ---------- other : `~gammapy.data.EventList` Event list to stack to self """ self.table = vstack_tables([self.table, other.table])
def stack(cls, event_lists, **kwargs): """Stack (concatenate) list of event lists. Calls `~astropy.table.vstack`. Parameters ---------- event_lists : list list of `~gammapy.data.EventList` to stack """ tables = [_.table for _ in event_lists] stacked_table = vstack_tables(tables, **kwargs) return cls(stacked_table)
def add_triggers(table, key, segments=None): """Add a `EventTable` to the global memory cache """ if segments is not None: table.meta['segments'] = segments try: old = globalv.TRIGGERS[key] except KeyError: globalv.TRIGGERS[key] = table else: segs = old.meta['segments'] globalv.TRIGGERS[key] = vstack_tables((old, table)) globalv.TRIGGERS[key].meta['segments'].coalesce()
def table_from_pycbc_live(source, ifo=None, columns=None, nproc=1, **kwargs): """Read a `GWRecArray` from one or more PyCBC live files """ source = file_list(source) if nproc > 1: from ...io.cache import read_cache return read_cache(source, EventTable, nproc, None, ifo=ifo, columns=columns, format=PYCBC_LIVE_FORMAT, **kwargs) source = filter_empty_files(source, ifo=ifo) return vstack_tables( [_table_from_file(x, ifo=ifo, columns=columns, **kwargs) for x in source])
def add_triggers(table, key, segments=None): """Add a `EventTable` to the global memory cache """ if segments is not None: table.meta['segments'] = segments try: old = globalv.TRIGGERS[key] except KeyError: new = globalv.TRIGGERS[key] = table new.meta.setdefault('segments', SegmentList()) else: new = globalv.TRIGGERS[key] = vstack_tables((old, table)) new.meta = old.meta new.meta['segments'] |= table.meta.get('segments', SegmentList()) new.meta['segments'].coalesce() return new
def find_max_significance(primary, auxiliary, channel, snrs, windows, livetime): """Find the maximum Hveto significance for this primary-auxiliary pair Parameters ---------- primary : `numpy.recarray` record array of data from the primary channel auxiliary : `numpy.recarray` record array from the auxiliary channel snrs : `list` of `float` the SNR thresholds to use window : `list` of `float` the time windows to use Returns ------- winner : `HvetoWinner` the parameters and segments generated by the (snr, dt) with the highest significance """ rec = vstack_tables([primary] + list(auxiliary.values())) coincs = find_all_coincidences(rec, channel, snrs, windows) winner = HvetoWinner(name='unknown', significance=-1) sigs = dict((c, 0) for c in auxiliary) for p, cdict in coincs.items(): dt, snr = p for chan in cdict: mu = (len(primary) * (auxiliary[chan]['snr'] >= snr).sum() * dt / livetime) # NOTE: coincs[p][chan] counts the number of primary channel # triggers coincident with a 'chan' trigger try: sig = significance(coincs[p][chan], mu) except KeyError: sig == 0 if sig > sigs[chan]: sigs[chan] = sig if sig > winner.significance: winner.name = chan winner.snr = snr winner.window = dt winner.significance = sig winner.mu = mu return winner, sigs
def find_max_significance(primary, auxiliary, channel, snrs, windows, livetime): """Find the maximum Hveto significance for this primary-auxiliary pair Parameters ---------- primary : `numpy.recarray` record array of data from the primary channel auxiliary : `numpy.recarray` record array from the auxiliary channel snrs : `list` of `float` the SNR thresholds to use window : `list` of `float` the time windows to use Returns ------- winner : `HvetoWinner` the parameters and segments generated by the (snr, dt) with the highest significance """ rec = vstack_tables([primary] + list(auxiliary.values())) coincs = find_all_coincidences(rec, channel, snrs, windows) winner = HvetoWinner(name='unknown', significance=-1) sigs = dict((c, 0) for c in auxiliary) for p, cdict in coincs.items(): dt, snr = p for chan in cdict: mu = (len(primary) * (auxiliary[chan]['snr'] >= snr).sum() * dt / livetime) # NOTE: coincs[p][chan] counts the number of primary channel # triggers coincident with a 'chan' trigger try: sig = significance(coincs[p][chan], mu) except KeyError: sig == 0 if sig > sigs[chan]: sigs[chan] = sig if sig > winner.significance: winner.name = chan winner.snr = snr winner.window = dt winner.significance = sig winner.mu = mu return winner, sigs
def table_from_pycbc_live(source, ifo=None, columns=None, nproc=1, **kwargs): """Read a `GWRecArray` from one or more PyCBC live files """ source = file_list(source) if nproc > 1: from ...io.cache import read_cache return read_cache(source, EventTable, nproc, None, ifo=ifo, columns=columns, format=PYCBC_LIVE_FORMAT, **kwargs) source = filter_empty_files(source, ifo=ifo) return vstack_tables([ _table_from_file(x, ifo=ifo, columns=columns, **kwargs) for x in source ])
def veto_all(auxiliary, segmentlist): """Remove events from all auxiliary channel tables based on a segmentlist Parameters ---------- auxiliary : `dict` of `numpy.recarray` a `dict` of event arrays to veto segmentlist : `~ligo.segments.segmentlist` the list of veto segments to use Returns ------- survivors : `dict` of `numpy.recarray` a dict of the reduced arrays of events for each input channel See Also -------- core.veto for details on the veto algorithm itself """ channels = auxiliary.keys() t = vstack_tables(list(auxiliary.values())) keep, _ = veto(t, segmentlist) return dict((c, keep[keep['channel'] == c]) for c in channels)
def veto_all(auxiliary, segmentlist): """Remove events from all auxiliary channel tables based on a segmentlist Parameters ---------- auxiliary : `dict` of `numpy.recarray` a `dict` of event arrays to veto segmentlist : `~ligo.segments.segmentlist` the list of veto segments to use Returns ------- survivors : `dict` of `numpy.recarray` a dict of the reduced arrays of events for each input channel See Also -------- core.veto for details on the veto algorithm itself """ channels = auxiliary.keys() t = vstack_tables(list(auxiliary.values())) keep, _ = veto(t, segmentlist) return dict((c, keep[keep['channel'] == c]) for c in channels)
def vstack_from_files(cls, filenames, logger=None): """Stack event lists vertically (combine events and GTIs). This function stacks (a.k.a. concatenates) event lists. E.g. if you have one event list with 100 events (i.e. 100 rows) and another with 42 events, the output event list will have 142 events. It also stacks the GTIs so that exposure computations are still possible using the stacked event list. At the moment this can require a lot of memory. All event lists are loaded into memory at the same time. TODO: implement and benchmark different a more efficient method: Get number of rows from headers, pre-allocate a large table, open files one by one and fill correct rows. TODO: handle header keywords "correctly". At the moment the output event list header keywords are copies of the values from the first observation, i.e. meaningless. Here's a (probably incomplete) list of values we should handle (usually by computing the min, max or mean or removing it): - OBS_ID - DATE_OBS, DATE_END - TIME_OBS, TIME_END - TSTART, TSTOP - LIVETIME, DEADC - RA_PNT, DEC_PNT - ALT_PNT, AZ_PNT Parameters ---------- filenames : list of str List of event list filenames Returns ------- event_list_dataset : `~gammapy.data.EventListDataset` """ total_filesize = 0 for filename in filenames: total_filesize += Path(filename).stat().st_size if logger: logger.info('Number of files to stack: {}'.format(len(filenames))) logger.info('Total filesize: {:.2f} MB'.format(total_filesize / 1024.**2)) logger.info('Reading event list files ...') event_lists = [] gtis = [] for filename in ProgressBar(filenames): # logger.info('Reading {}'.format(filename)) event_list = Table.read(filename, hdu='EVENTS') # TODO: Remove and modify header keywords for stacked event list meta_del = ['OBS_ID', 'OBJECT'] meta_mod = ['DATE_OBS', 'DATE_END', 'TIME_OBS', 'TIME_END'] gti = Table.read(filename, hdu='GTI') event_lists.append(event_list) gtis.append(gti) from astropy.table import vstack as vstack_tables total_event_list = vstack_tables(event_lists, metadata_conflicts='silent') total_gti = vstack_tables(gtis, metadata_conflicts='silent') total_event_list.meta['EVTSTACK'] = 'yes' total_gti.meta['EVTSTACK'] = 'yes' return cls(event_list=total_event_list, gti=total_gti)
def get_triggers(channel, etg, segments, cache=None, snr=None, frange=None, raw=False, trigfind_kwargs={}, **read_kwargs): """Get triggers for the given channel """ etg = _sanitize_name(etg) # format arguments try: readfmt = read_kwargs.pop("format", DEFAULT_FORMAT[etg]) except KeyError: raise ValueError("unsupported ETG {!r}".format(etg)) trigfind_kwargs, read_kwargs = _format_params(channel, etg, readfmt, trigfind_kwargs, read_kwargs) # find triggers if cache is None: cache = find_trigger_files(channel, etg, segments, **trigfind_kwargs) # read files tables = [] for segment in segments: segaslist = SegmentList([segment]) segcache = io_cache.sieve(cache, segment=segment) # try and work out if cache overextends segment (so we need to crop) cachesegs = io_cache.cache_segments(segcache) outofbounds = abs(cachesegs - segaslist) if segcache: if len(segcache) == 1: # just pass the single filename segcache = segcache[0] new = EventTable.read(segcache, **read_kwargs) new.meta = {k: new.meta[k] for k in TABLE_META if new.meta.get(k)} if outofbounds: new = new[in_segmentlist(new[new.dtype.names[0]], segaslist)] tables.append(new) if len(tables): table = vstack_tables(tables) else: table = EventTable( names=read_kwargs.get('columns', ['time', 'frequency', 'snr'])) # parse time, frequency-like and snr-like column names columns = table.dtype.names tcolumn = columns[0] fcolumn = columns[1] scolumn = columns[2] # filter keep = numpy.ones(len(table), dtype=bool) if snr is not None: keep &= table[scolumn] >= snr if frange is not None: keep &= table[fcolumn] >= frange[0] keep &= table[fcolumn] < frange[1] table = table[keep] # return basic table if 'raw' if raw: return table # rename time column so that all tables match in at least that if tcolumn != "time": table.rename_column(tcolumn, 'time') # add channel column to identify all triggers table.add_column( table.Column(data=numpy.repeat(channel, len(table)), name='channel')) table.sort('time') return table
def read_cache(cache, target, nproc, post, *args, **kwargs): """Read arbitrary data from a cache file Parameters ---------- cache : :class:`glue.lal.Cache`, `str` cache of files files, or path to a LAL-format cache file on disk. target : `type` target class to read into. nproc : `int` number of individual processes to use. post : `function` function to post-process output object before returning. The output of this method will be returns, so in-place operations must return the object. *args other positional arguments to pass to the target.read() classmethod. **kwargs keyword arguments to pass to the target.read() classmethod. Returns ------- data : target an instance of the target class, seeded with data read from the cache. Notes ----- The returned object is constructed from the output of each sub-process via the '+=' in-place addition operator. If the input cache is indeed a :class:`~glue.lal.Cache` object, the sub-processes will be combined in time order, otherwise the ordering is given by the order of entries in the input cache (for example, if it is a simple `list` of files). .. warning:: no protection is given against overloading the host, for example, no checks are done to ensure that ``nproc`` is less than the number of available cores. High values of ``nproc`` should be used at the users discretion, the GWpy team accepts to liability for loss as a result of abuse of this feature. """ # read the cache if isinstance(cache, (file, unicode, str)): cache = open_cache(cache) if isinstance(cache, Cache): cache.sort(key=lambda ce: ce.segment[0]) # force one file per process minimum nproc = min(nproc, len(cache)) if nproc > cpu_count(): warnings.warn("Using %d processes on a %d-core machine is " "unrecommended...but not forbidden." % (nproc, cpu_count())) # work out underlying data type try: kwargs.setdefault( 'format', _get_valid_format('read', target, None, None, (cache[0],), {})) # if empty, put anything, since it doesn't matter except IndexError: kwargs.setdefault('format', 'ascii') except Exception: if 'format' not in kwargs: raise if nproc <= 1: return target.read(cache, *args, **kwargs) # define how to read each sub-cache def _read(q, sc, i): try: q.put((i, target.read(sc, *args, **kwargs))) except Exception as e: q.put(e) # separate cache into parts fperproc = int(ceil(len(cache) / nproc)) subcaches = [cache.__class__(cache[i:i+fperproc]) for i in range(0, len(cache), fperproc)] # start all processes queue = ProcessQueue(nproc) proclist = [] for i, subcache in enumerate(subcaches): if len(subcache) == 0: continue process = Process(target=_read, args=(queue, subcache, i)) process.daemon = True proclist.append(process) process.start() # get data and block pout = [] for i in range(len(proclist)): result = queue.get() if isinstance(result, Exception): raise result pout.append(result) for process in proclist: process.join() # combine and return data = zip(*sorted(pout, key=lambda out: out[0]))[1] if issubclass(target, Table): # astropy.table.Table out = vstack_tables(data, join_type='exact') elif issubclass(target, recarray): out = recfunctions.stack_arrays(data, asrecarray=True, usemask=False, autoconvert=True).view(target) else: try: if hasattr(target, 'tableName'): # glue.ligolw.table.Table out = data[0] else: out = data[0].copy() except AttributeError: out = data[0] for datum in data[1:]: out += datum if post: return post(out) else: return out
def vstack_from_files(cls, filenames, logger=None): """Stack event lists vertically (combine events and GTIs). This function stacks (a.k.a. concatenates) event lists. E.g. if you have one event list with 100 events (i.e. 100 rows) and another with 42 events, the output event list will have 142 events. It also stacks the GTIs so that exposure computations are still possible using the stacked event list. At the moment this can require a lot of memory. All event lists are loaded into memory at the same time. TODO: implement and benchmark different a more efficient method: Get number of rows from headers, pre-allocate a large table, open files one by one and fill correct rows. TODO: handle header keywords "correctly". At the moment the output event list header keywords are copies of the values from the first observation, i.e. meaningless. Here's a (probably incomplete) list of values we should handle (usually by computing the min, max or mean or removing it): - OBS_ID - DATE_OBS, DATE_END - TIME_OBS, TIME_END - TSTART, TSTOP - LIVETIME, DEADC - RA_PNT, DEC_PNT - ALT_PNT, AZ_PNT Parameters ---------- filenames : list of str List of event list filenames Returns ------- event_list_dataset : `~gammapy.data.EventListDataset` """ total_filesize = 0 for filename in filenames: total_filesize += Path(filename).stat().st_size if logger: logger.info('Number of files to stack: {}'.format(len(filenames))) logger.info('Total filesize: {:.2f} MB'.format(total_filesize / 1024. ** 2)) logger.info('Reading event list files ...') event_lists = [] gtis = [] for filename in ProgressBar(filenames): # logger.info('Reading {}'.format(filename)) event_list = Table.read(filename, hdu='EVENTS') # TODO: Remove and modify header keywords for stacked event list meta_del = ['OBS_ID', 'OBJECT'] meta_mod = ['DATE_OBS', 'DATE_END', 'TIME_OBS', 'TIME_END'] gti = Table.read(filename, hdu='GTI') event_lists.append(event_list) gtis.append(gti) from astropy.table import vstack as vstack_tables total_event_list = vstack_tables(event_lists, metadata_conflicts='silent') total_gti = vstack_tables(gtis, metadata_conflicts='silent') total_event_list.meta['EVTSTACK'] = 'yes' total_gti.meta['EVTSTACK'] = 'yes' return cls(event_list=total_event_list, gti=total_gti)
def read_cache(cache, target, nproc, post, *args, **kwargs): """Read arbitrary data from a cache file Parameters ---------- cache : :class:`glue.lal.Cache`, `str` cache of files files, or path to a LAL-format cache file on disk. target : `type` target class to read into. nproc : `int` number of individual processes to use. post : `function` function to post-process output object before returning. The output of this method will be returns, so in-place operations must return the object. *args other positional arguments to pass to the target.read() classmethod. **kwargs keyword arguments to pass to the target.read() classmethod. Returns ------- data : target an instance of the target class, seeded with data read from the cache. Notes ----- The returned object is constructed from the output of each sub-process via the '+=' in-place addition operator. If the input cache is indeed a :class:`~glue.lal.Cache` object, the sub-processes will be combined in time order, otherwise the ordering is given by the order of entries in the input cache (for example, if it is a simple `list` of files). .. warning:: no protection is given against overloading the host, for example, no checks are done to ensure that ``nproc`` is less than the number of available cores. High values of ``nproc`` should be used at the users discretion, the GWpy team accepts to liability for loss as a result of abuse of this feature. """ # read the cache if isinstance(cache, (file, unicode, str)): cache = open_cache(cache) if isinstance(cache, Cache): cache.sort(key=lambda ce: ce.segment[0]) # force one file per process minimum nproc = min(nproc, len(cache)) if nproc > cpu_count(): warnings.warn("Using %d processes on a %d-core machine is " "unrecommended...but not forbidden." % (nproc, cpu_count())) # work out underlying data type try: kwargs.setdefault( 'format', _get_valid_format('read', target, None, None, (cache[0],), {})) # if empty, put anything, since it doesn't matter except IndexError: kwargs.setdefault('format', 'ascii') except Exception: if 'format' not in kwargs: raise if nproc <= 1: return target.read(cache, *args, **kwargs) # define how to read each sub-cache def _read(q, sc, i): try: q.put((i, target.read(sc, *args, **kwargs))) except Exception as e: q.put(e) # separate cache into parts fperproc = int(ceil(len(cache) / nproc)) subcaches = [cache.__class__(cache[i:i+fperproc]) for i in range(0, len(cache), fperproc)] # start all processes queue = ProcessQueue(nproc) proclist = [] for i, subcache in enumerate(subcaches): if len(subcache) == 0: continue process = Process(target=_read, args=(queue, subcache, i)) process.daemon = True proclist.append(process) process.start() # get data and block pout = [] for i in range(len(proclist)): result = queue.get() if isinstance(result, Exception): raise result pout.append(result) for process in proclist: process.join() # combine and return data = zip(*sorted(pout, key=lambda out: out[0]))[1] if issubclass(target, Table): # astropy.table.Table out = vstack_tables(data, join_type='exact') elif issubclass(target, recarray): out = recfunctions.stack_arrays(data, asrecarray=True, usemask=False, autoconvert=True).view(target) else: try: if hasattr(target, 'tableName'): # glue.ligolw.table.Table out = data[0] else: out = data[0].copy() except AttributeError: out = data[0] for datum in data[1:]: out += datum if post: return post(out) else: return out
def get_triggers(channel, etg, segments, cache=None, snr=None, frange=None, raw=False, trigfind_kwargs={}, **read_kwargs): """Get triggers for the given channel """ etg = _sanitize_name(etg) # format arguments try: readfmt = read_kwargs.pop("format", DEFAULT_FORMAT[etg]) except KeyError: raise ValueError("unsupported ETG {!r}".format(etg)) trigfind_kwargs, read_kwargs = _format_params( channel, etg, readfmt, trigfind_kwargs, read_kwargs ) # find triggers if cache is None: cache = find_trigger_files(channel, etg, segments, **trigfind_kwargs) # read files tables = [] for segment in segments: segaslist = SegmentList([segment]) segcache = io_cache.sieve(cache, segment=segment) # try and work out if cache overextends segment (so we need to crop) cachesegs = io_cache.cache_segments(segcache) outofbounds = abs(cachesegs - segaslist) if segcache: if len(segcache) == 1: # just pass the single filename segcache = segcache[0] new = EventTable.read(segcache, **read_kwargs) new.meta = {k: new.meta[k] for k in TABLE_META if new.meta.get(k)} if outofbounds: new = new[new[new.dtype.names[0]].in_segmentlist(segaslist)] tables.append(new) if len(tables): table = vstack_tables(tables) else: table = EventTable(names=read_kwargs.get( 'columns', ['time', 'frequency', 'snr'])) # parse time, frequency-like and snr-like column names columns = table.dtype.names tcolumn = columns[0] fcolumn = columns[1] scolumn = columns[2] # filter keep = numpy.ones(len(table), dtype=bool) if snr is not None: keep &= table[scolumn] >= snr if frange is not None: keep &= table[fcolumn] >= frange[0] keep &= table[fcolumn] < frange[1] table = table[keep] # return basic table if 'raw' if raw: return table # rename time column so that all tables match in at least that if tcolumn != "time": table.rename_column(tcolumn, 'time') # add channel column to identify all triggers table.add_column(table.Column(data=numpy.repeat(channel, len(table)), name='channel')) table.sort('time') return table
def main(args=None): """Run the zero-crossing counter tool """ parser = create_parser() args = parser.parse_args(args=args) span = Segment(args.gpsstart, args.gpsend) LOGGER.info('-- Processing channel %s over span %d - %d' % (args.channel, args.gpsstart, args.gpsend)) if args.state_flag: state = DataQualityFlag.query( args.state_flag, int(args.gpsstart), int(args.gpsend), url=const.DEFAULT_SEGMENT_SERVER, ) statea = state.active else: statea = SegmentList([span]) duration = abs(span) # initialize output files for each threshold and store them in a dict outfiles = {} for thresh in args.threshold: outfiles[str(thresh)] = (os.path.join( args.output_path, '%s_%s_DAC-%d-%d.h5' % (args.channel.replace('-', '_').replace(':', '-'), str( int(thresh)).replace('-', 'n'), int(args.gpsstart), duration))) # get frame cache cache = gwdatafind.find_urls(args.ifo[0], args.frametype, int(args.gpsstart), int(args.gpsend)) cachesegs = statea & cache_segments(cache) if not os.path.exists(args.output_path): os.makedirs(args.output_path) # initialize a ligolw table for each threshold and store them in a dict names = ("time", "frequency", "snr") dtypes = ("f8", ) * len(names) tables = {} for thresh in args.threshold: tables[str(thresh)] = EventTable( names=names, dtype=dtypes, meta={"channel": args.channel}, ) # for each science segment, read in the data from frames, check for # threshold crossings, and if the rate of crossings is less than # rate_thresh, write to a sngl_burst table for seg in cachesegs: LOGGER.debug("Processing {}:".format(seg)) c = sieve_cache(cache, segment=seg) if not c: LOGGER.warning(" No {} data files for this segment, " "skipping".format(args.frametype)) continue data = get_data(args.channel, seg[0], seg[1], nproc=args.nproc, source=c, verbose="Reading data:".rjust(30)) for thresh in args.threshold: times = find_crossings(data, thresh) rate = float(times.size) / abs(seg) if times.size else 0 LOGGER.info(" Found {0} crossings of {1}, rate: {2} Hz".format( times.size, thresh, rate, )) if times.size and rate < args.rate_thresh: existing = tables[str(thresh)] tables[str(thresh)] = vstack_tables( ( existing, table_from_times(times, snr=10., frequency=100., names=existing.colnames), ), join_type="exact", ) n = max(map(len, tables.values())) for thresh, outfile in outfiles.items(): tables[thresh].write( outfile, path="triggers", format="hdf5", overwrite=True, ) LOGGER.info("{0} events written to {1}".format( str(len(tables[thresh])).rjust(len(str(n))), outfile, ))