def cache_overlaps(*caches): """Find segments of overlap in the given cache sets """ cache = [e for c in caches for e in c] cache.sort(key=lambda e: file_segment(e)[0]) overlap = SegmentList() segments = SegmentList() for e in cache: seg = file_segment(e) ol = SegmentList([seg]) & segments if abs(ol): overlap.extend(ol) segments.append(seg) return overlap
def find_best_frames(ifo, frametype, start, end, **kwargs): """Find frames for the given type, replacing with a better type if needed """ # find cache for this frametype cache = find_frames(ifo, frametype, start, end, **kwargs) # check for gaps in current cache span = SegmentList([Segment(start, end)]) gaps = span - cache_segments(cache) # if gaps and using aggregated h(t), check short files if abs(gaps) and frametype in SHORT_HOFT_TYPES: f2 = SHORT_HOFT_TYPES[frametype] vprint(" Gaps discovered in aggregated h(t) type " "%s, checking %s\n" % (frametype, f2)) kwargs['gaps'] = 'ignore' cache.extend( filter(lambda e: file_segment(e) in gaps, find_frames(ifo, f2, start, end, **kwargs))) new = int(abs(gaps - cache_segments(cache))) if new: vprint(" %ss extra coverage with frametype %s\n" % (new, f2)) else: vprint(" No extra coverage with frametype %s\n" % f2) return cache, frametype
def test_file_segment(self): """Test :func:`gwpy.io.cache.file_segment` """ # check basic fs = io_cache.file_segment('A-B-1-2.ext') assert isinstance(fs, Segment) assert fs == Segment(1, 3) # check mutliple file extensions assert io_cache.file_segment('A-B-1-2.ext.gz') == (1, 3) # check floats (and multiple file extensions) assert io_cache.file_segment('A-B-1.23-4.ext.gz') == (1.23, 5.23) # test errors with pytest.raises(ValueError) as exc: io_cache.file_segment('blah') assert str(exc.value) == ('Failed to parse \'blah\' as ' 'LIGO-T050017-compatible filename')
def find_best_frames(ifo, frametype, start, end, **kwargs): """Find frames for the given type, replacing with a better type if needed """ # find cache for this frametype cache = find_frames(ifo, frametype, start, end, **kwargs) # check for gaps in current cache span = SegmentList([Segment(start, end)]) gaps = span - cache_segments(cache) # if gaps and using aggregated h(t), check short files if abs(gaps) and frametype in SHORT_HOFT_TYPES: f2 = SHORT_HOFT_TYPES[frametype] vprint(" Gaps discovered in aggregated h(t) type " "%s, checking %s\n" % (frametype, f2)) kwargs['gaps'] = 'ignore' cache.extend(filter(lambda e: file_segment(e) in gaps, find_frames(ifo, f2, start, end, **kwargs))) new = int(abs(gaps - cache_segments(cache))) if new: vprint(" %ss extra coverage with frametype %s\n" % (new, f2)) else: vprint(" No extra coverage with frametype %s\n" % f2) return cache, frametype
def find_omicron_files(channel, start, end, basepath, ext='xml.gz', filetag=const.OMICRON_FILETAG.upper()): """Find Omicron files under a given starting directory """ gps5 = int(str(start)[:5])-1 cache = list() span = Segment(start, end) while gps5 <= int(str(end)[:5]): new = _iter_files_in_gps_directory(channel, basepath, gps5, ext, filetag=filetag) cache.extend(path for path in new if file_segment(path).intersects(span)) gps5 += 1 return cache
def read_and_cache_events(channel, etg, cache=None, trigfind_kw={}, **read_kw): cfile = create_path(channel) # read existing cached triggers and work out new segments to query if args.append and cfile.is_file(): previous = DataQualityFlag.read( str(cfile), path='segments', format='hdf5', ).coalesce() new = analysis - previous else: new = analysis.copy() # get cache of files if cache is None: cache = find_trigger_files(channel, etg, new.active, **trigfind_kw) else: cache = list( filter( lambda e: new.active.intersects_segment(file_segment(e)), cache, )) # restrict 'active' segments to when we have data try: new.active &= cache_segments(cache) except IndexError: new.active = type(new.active)() # find new triggers try: trigs = get_triggers(channel, etg, new.active, cache=cache, raw=True, **read_kw) # catch error and continue except ValueError as e: warnings.warn('%s: %s' % (type(e).__name__, str(e))) else: path = write_events(channel, trigs, new) try: return path, len(trigs) except TypeError: # None return
def find_archive_latency(channel, padding, frametype=None, state=None, base=const.OMICRON_ARCHIVE): """Find the latency of Omicron file archival for the given channel Parameters ---------- channel : `str` name of channel padding : `int` padding parameter for Omicron processing frametype : `str`, optional frame type ID for data frame files state : `str`, optional name of DQSegDB flag defining operational state for this channel base : `str`, optional base directory for Omicron archive Returns ------- latency : `dict` a `dict` of `(ext, latency)` pairs for each file extension stored in the archive ('root', 'xml.gz') """ ifo = channel[:2] obs = ifo[0] # find latest GPS time if state is None and frametype is None: raise ValueError("Please give one of `state` or `frametype`") if state is None: target = get_latest_data_gps(obs, frametype) else: target = get_latest_active_gps(state) target -= padding # find latest file latency = {} for ext in ['root', 'xml.gz']: f = find_latest_omicron_file(channel, base, ext=ext) end = file_segment(f)[1] latency[ext] = (int(target - end), f) return latency
def find_omicron_files(channel, start, end, basepath, ext='xml.gz', filetag=const.OMICRON_FILETAG.upper()): """Find Omicron files under a given starting directory """ gps5 = int(str(start)[:5]) - 1 cache = list() span = Segment(start, end) while gps5 <= int(str(end)[:5]): new = _iter_files_in_gps_directory(channel, basepath, gps5, ext, filetag=filetag) cache.extend(path for path in new if file_segment(path).intersects(span)) gps5 += 1 return cache
def find_frames(ifo, frametype, gpsstart, gpsend, config=GWSummConfigParser(), urltype='file', gaps='warn', onerror='raise'): """Query the datafind server for GWF files for the given type Parameters ---------- ifo : `str` prefix for the IFO of interest (either one or two characters) frametype : `str` name of the frametype to find gpsstart : `int` GPS start time of the query gpsend : `int` GPS end time of the query config : `~ConfigParser.ConfigParser`, optional configuration with `[datafind]` section containing `server` specification, otherwise taken from the environment urltype : `str`, optional what type of file paths to return, default: `file` gaps : `str`, optional what to do when gaps are detected, one of - `ignore` : do nothing - `warn` : display the existence of gaps but carry on - `raise` : raise an exception onerror : `str`, optional what to do when the `gwdatafind` query itself fails, same options as for ``gaps`` Returns ------- cache : `list` of `str` a list of file paths pointing at GWF files matching the request """ vprint(' Finding %s-%s frames for [%d, %d)...' % (ifo[0], frametype, int(gpsstart), int(gpsend))) # find datafind host:port try: host = config.get('datafind', 'server') except (NoOptionError, NoSectionError): host = None port = None else: port = config.getint('datafind', 'port') # XXX HACK: LLO changed frame types on Dec 6 2013: LLOCHANGE = 1070291904 if re.match(r'L1_{CRMT}', frametype) and gpsstart < LLOCHANGE: frametype = frametype[-1] # query frames ifo = ifo[0].upper() gpsstart = int(floor(gpsstart)) gpsend = int(ceil(min(globalv.NOW, gpsend))) if gpsend <= gpsstart: return [] # parse match try: frametype, match = frametype.split('|', 1) except ValueError: match = None def _query(): return gwdatafind.find_urls(ifo[0].upper(), frametype, gpsstart, gpsend, urltype=urltype, on_gaps=gaps, match=match, host=host, port=port) try: cache = _query() except RuntimeError as e: sleep(1) try: cache = _query() except RuntimeError: if 'Invalid GPS times' in str(e): e.args = ('%s: %d ... %s' % (str(e), gpsstart, gpsend), ) if onerror in ['ignore', None]: pass elif onerror in ['warn']: warnings.warn('Caught %s: %s' % (type(e).__name__, str(e))) else: raise cache = [] # XXX: if querying for day of LLO frame type change, do both if (ifo[0].upper() == 'L' and frametype in ['C', 'R', 'M', 'T'] and gpsstart < LLOCHANGE < gpsend): start = len(cache) and cache[-1].segment[1] or gpsstart if start < gpsend: cache.extend( gwdatafind.find_urls(ifo[0].upper(), 'L1_%s' % frametype, start, gpsend, urltype=urltype, on_gaps=gaps, host=host, port=port)[1:]) # extend cache beyond datafind's knowledge to reduce latency try: latest = cache[-1] ngps = len( re_gwf_gps_epoch.search( os.path.dirname(latest)).groupdict()['gpsepoch']) except (IndexError, AttributeError): pass else: while True: s, e = file_segment(latest) if s >= gpsend: break # replace GPS time of file basename new = latest.replace('-%d-' % s, '-%d-' % e) # replace GPS epoch in dirname new = new.replace('%s/' % str(s)[:ngps], '%s/' % str(e)[:ngps]) if os.path.isfile(new): cache.append(new) else: break # validate files existing and return cache = list(filter(os.path.exists, map(_urlpath, cache))) vprint(' %d found.\n' % len(cache)) return cache
def is_saturated(channel, cache, start=None, end=None, indicator='LIMEN', nproc=DEFAULT_NPROC): """Check whether a channel has saturated its software limit Parameters ---------- channel : `str`, or `list` of `str` either a single channel name, or a list of channel names cache : `list` a `list` of file paths, the cache must be contiguous start : `~gwpy.time.LIGOTimeGPS`, `int` the GPS start time of the check end : `~gwpy.time.LIGOTimeGPS`, `int` the GPS end time of the check indicator : `str` the suffix of the indicator channel, either `'LIMEN'` or `'SWSTAT'` nproc : `int` the number of parallel processes to use for frame reading Returns ------- saturated : `bool`, `None`, or `DataQualityFlag`, or `list` of the same one of the following given the conditions - `None` : if the channel doesn't have a software limit - `False` : if the channel didn't saturate - `~gwpy.segments.DataQualityFlag` : otherwise OR, a `list` of the above if a `list` of channels was given in the first place """ if isinstance(channel, (list, tuple)): channels = channel else: channels = [channel] # parse prefix for i, c in enumerate(channels): if c.endswith('_LIMIT'): channels[i] = c[:-6] # check limit if set indicators = ['{}_{}'.format(c, indicator) for c in channels] gps = file_segment(cache[0])[0] data = get_data(indicators, gps, gps + 1, source=cache, nproc=nproc) # check limits for returned channels if len(data) < len(channels): # exclude nonexistent channels channels = [ c for c in channels if '{}_{}'.format(c, indicator) in data ] indicators = ['{}_{}'.format(c, indicator) for c in channels] if indicator.upper() == 'LIMEN': active = dict( (c, data[indicators[i]].value[0]) for i, c in enumerate(channels)) elif indicator.upper() == 'SWSTAT': active = dict( (c, data[indicators[i]].astype('uint32').value[0] >> 13 & 1) for i, c in enumerate(channels)) else: raise ValueError("Don't know how to determine if limit is set for " "indicator %r" % indicator) # get output/limit data for all with active limits activechans = [c for c in channels if active[c]] datachans = [ '%s_%s' % (c, s) for c in activechans for s in ('LIMIT', 'OUTPUT') ] data = get_data(datachans, start, end, source=cache, nproc=nproc) # find saturations of the limit for each channel dataiter = ((data['%s_OUTPUT' % c], data['%s_LIMIT' % c]) for c in activechans) if nproc > 1: with Pool(processes=nproc) as pool: saturations = list(pool.map(_find_saturations, dataiter)) else: saturations = list(map(_find_saturations, dataiter)) # return many or one (based on input) if isinstance(channel, (list, tuple)): return saturations else: return saturations[0]
def is_saturated(channel, cache, start=None, end=None, indicator='LIMEN', nproc=DEFAULT_NPROC): """Check whether a channel has saturated its software limit Parameters ---------- channel : `str`, or `list` of `str` either a single channel name, or a list of channel names cache : `list` a `list` of file paths, the cache must be contiguous start : `~gwpy.time.LIGOTimeGPS`, `int` the GPS start time of the check end : `~gwpy.time.LIGOTimeGPS`, `int` the GPS end time of the check indicator : `str` the suffix of the indicator channel, either `'LIMEN'` or `'SWSTAT'` nproc : `int` the number of parallel processes to use for frame reading Returns ------- saturated : `bool`, `None`, or `DataQualityFlag`, or `list` of the same one of the following given the conditions - `None` : if the channel doesn't have a software limit - `False` : if the channel didn't saturate - `~gwpy.segments.DataQualityFlag` : otherwise OR, a `list` of the above if a `list` of channels was given in the first place """ if isinstance(channel, (list, tuple)): channels = channel else: channels = [channel] # parse prefix for i, c in enumerate(channels): if c.endswith('_LIMIT'): channels[i] = c[:-6] # check limit if set indicators = ['{}_{}'.format(c, indicator) for c in channels] gps = file_segment(cache[0])[0] data = get_data(indicators, gps, gps+1, source=cache, nproc=nproc) # check limits for returned channels if len(data) < len(channels): # exclude nonexistent channels channels = [ c for c in channels if '{}_{}'.format(c, indicator) in data] indicators = ['{}_{}'.format(c, indicator) for c in channels] if indicator.upper() == 'LIMEN': active = dict((c, data[indicators[i]].value[0]) for i, c in enumerate(channels)) elif indicator.upper() == 'SWSTAT': active = dict( (c, data[indicators[i]].astype('uint32').value[0] >> 13 & 1) for i, c in enumerate(channels)) else: raise ValueError("Don't know how to determine if limit is set for " "indicator %r" % indicator) # get output/limit data for all with active limits activechans = [c for c in channels if active[c]] datachans = ['%s_%s' % (c, s) for c in activechans for s in ('LIMIT', 'OUTPUT')] data = get_data(datachans, start, end, source=cache, nproc=nproc) # find saturations of the limit for each channel dataiter = ((data['%s_OUTPUT' % c], data['%s_LIMIT' % c]) for c in activechans) if nproc > 1: pool = Pool(processes=nproc) saturations = list(pool.map(_find_saturations, dataiter)) pool.close() else: saturations = list(map(_find_saturations, dataiter)) # return many or one (based on input) if isinstance(channel, (list, tuple)): return saturations else: return saturations[0]
def find_frames(ifo, frametype, gpsstart, gpsend, config=GWSummConfigParser(), urltype='file', gaps='warn', onerror='raise'): """Query the datafind server for GWF files for the given type Parameters ---------- ifo : `str` prefix for the IFO of interest (either one or two characters) frametype : `str` name of the frametype to find gpsstart : `int` GPS start time of the query gpsend : `int` GPS end time of the query config : `~ConfigParser.ConfigParser`, optional configuration with `[datafind]` section containing `server` specification, otherwise taken from the environment urltype : `str`, optional what type of file paths to return, default: `file` gaps : `str`, optional what to do when gaps are detected, one of - `ignore` : do nothing - `warn` : display the existence of gaps but carry on - `raise` : raise an exception onerror : `str`, optional what to do when the `gwdatafind` query itself fails, same options as for ``gaps`` Returns ------- cache : `list` of `str` a list of file paths pointing at GWF files matching the request """ vprint(' Finding %s-%s frames for [%d, %d)...' % (ifo[0], frametype, int(gpsstart), int(gpsend))) # find datafind host:port try: host = config.get('datafind', 'server') except (NoOptionError, NoSectionError): host = None port = None else: port = config.getint('datafind', 'port') # XXX HACK: LLO changed frame types on Dec 6 2013: LLOCHANGE = 1070291904 if re.match(r'L1_{CRMT}', frametype) and gpsstart < LLOCHANGE: frametype = frametype[-1] # query frames ifo = ifo[0].upper() gpsstart = int(floor(gpsstart)) gpsend = int(ceil(min(globalv.NOW, gpsend))) if gpsend <= gpsstart: return [] # parse match try: frametype, match = frametype.split('|', 1) except ValueError: match = None def _query(): return gwdatafind.find_urls(ifo[0].upper(), frametype, gpsstart, gpsend, urltype=urltype, on_gaps=gaps, match=match, host=host, port=port) try: cache = _query() except RuntimeError as e: sleep(1) try: cache = _query() except RuntimeError: if 'Invalid GPS times' in str(e): e.args = ('%s: %d ... %s' % (str(e), gpsstart, gpsend),) if onerror in ['ignore', None]: pass elif onerror in ['warn']: warnings.warn('Caught %s: %s' % (type(e).__name__, str(e))) else: raise cache = [] # XXX: if querying for day of LLO frame type change, do both if (ifo[0].upper() == 'L' and frametype in ['C', 'R', 'M', 'T'] and gpsstart < LLOCHANGE < gpsend): start = len(cache) and cache[-1].segment[1] or gpsstart if start < gpsend: cache.extend(gwdatafind.find_urls( ifo[0].upper(), 'L1_%s' % frametype, start, gpsend, urltype=urltype, on_gaps=gaps, host=host, port=port)[1:]) # extend cache beyond datafind's knowledge to reduce latency try: latest = cache[-1] ngps = len(re_gwf_gps_epoch.search( os.path.dirname(latest)).groupdict()['gpsepoch']) except (IndexError, AttributeError): pass else: while True: s, e = file_segment(latest) if s >= gpsend: break # replace GPS time of file basename new = latest.replace('-%d-' % s, '-%d-' % e) # replace GPS epoch in dirname new = new.replace('%s/' % str(s)[:ngps], '%s/' % str(e)[:ngps]) if os.path.isfile(new): cache.append(new) else: break # validate files existing and return cache = list(filter(os.path.exists, map(_urlpath, cache))) vprint(' %d found.\n' % len(cache)) return cache