def find_urls(self, site, frametype, gpsstart, gpsend, match=None, on_gaps='warn'): """Find all files of the given type in the [start, end) GPS interval. """ span = LigoSegment(gpsstart, gpsend) cache = [e for e in self._read_ffl_cache(site, frametype) if e.observatory == site and e.description == frametype and e.segment.intersects(span)] urls = [e.path for e in cache] missing = LigoSegmentList([span]) - cache_segments(cache) if match: match = re.compile(match) urls = list(filter(match.search, urls)) # no missing data or don't care, return if on_gaps == 'ignore' or not missing: return urls # handle missing data msg = 'Missing segments: \n{0}'.format('\n'.join(map(str, missing))) if on_gaps == 'warn': warnings.warn(msg) return urls raise RuntimeError(msg)
def find_frametype(channel, gpstime=None, frametype_match=None, host=None, port=None, return_all=False, allow_tape=False, connection=None, on_gaps='error'): """Find the frametype(s) that hold data for a given channel Parameters ---------- channel : `str`, `~gwpy.detector.Channel` the channel to be found gpstime : `int`, optional target GPS time at which to find correct type frametype_match : `str`, optional regular expression to use for frametype `str` matching host : `str`, optional name of datafind host to use port : `int`, optional port on datafind host to use return_all : `bool`, optional, default: `False` return all found types, default is to return to 'best' match allow_tape : `bool`, optional, default: `False` do not test types whose frame files are stored on tape (not on spinning disk) on_gaps : `str`, optional action to take when gaps are discovered in datafind coverage, default: ``'error'``, i.e. don't match frametypes with gaps. Select ``'ignore'`` to ignore gaps, or ``'warn'`` to display warnings when gaps are found in a datafind `find_urls` query Returns ------- If a single name is given, and `return_all=False` (default): frametype : `str` name of best match frame type If a single name is given, and `return_all=True`: types : `list` of `str` the list of all matching frame types If multiple names are given, the above return types are wrapped into a `dict` of `(channel, type_or_list)` pairs. Examples -------- >>> from gwpy.io import datafind as io_datafind >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462) 'H1_R' >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462, ... return_all=True) ['H1_R', 'H1_C'] >>> io_datafind.find_frametype( ... ('H1:IMC-PWR_IN_OUTPUT', 'H1:OMC-DCPD_SUM_OUTPUT', ... 'H1:GDS-CALIB_STRAIN'), ... gpstime=1126259462, return_all=True))" {'H1:GDS-CALIB_STRAIN': ['H1_HOFT_C00'], 'H1:OMC-DCPD_SUM_OUTPUT': ['H1_R', 'H1_C'], 'H1:IMC-PWR_IN_OUTPUT': ['H1_R', 'H1_C']} """ # this function is now horrendously complicated to support a large # number of different use cases, hopefully the comments are sufficient from ..detector import Channel # format channel names as list if isinstance(channel, (list, tuple)): channels = channel else: channels = [channel] # create set() of GWF channel names, and dict map back to user names # this allows users to use nds-style names in this query, e.g. # 'X1:TEST.mean,m-trend', and still get results chans = {Channel(c).name: c for c in channels} names = set(chans.keys()) # format GPS time(s) if isinstance(gpstime, (list, tuple)): gpssegment = LigoSegment(*gpstime) gpstime = gpssegment[0] else: gpssegment = None if gpstime is not None: gpstime = int(to_gps(gpstime)) # if use gaps post-S5 GPStime, forcibly skip _GRBYYMMDD frametypes at CIT if frametype_match is None and gpstime is not None and gpstime > 875232014: frametype_match = GRB_TYPE # -- go match = {} ifos = set() # record IFOs we have queried to prevent duplication # loop over set of names, which should get smaller as names are searched while names: # parse first channel name (to get IFO and channel type) try: name = next(iter(names)) except KeyError: break else: chan = Channel(chans[name]) # parse IFO ID try: ifo = chan.ifo[0] except TypeError: # chan.ifo is None raise ValueError("Cannot parse interferometer prefix from channel " "name %r, cannot proceed with find()" % str(chan)) # if we've already gone through the types for this IFO, skip if ifo in ifos: names.pop() continue ifos.add(ifo) types = find_types(ifo, match=frametype_match, trend=chan.type, connection=connection) # loop over types testing each in turn for ftype in types: # find instance of this frametype try: path = find_latest(ifo, ftype, gpstime=gpstime, allow_tape=allow_tape, connection=connection, on_missing='ignore') except (RuntimeError, IOError, IndexError): # something went wrong continue # check for gaps in the record for this type if gpssegment is None: gaps = 0 else: cache = find_urls(ifo, ftype, *gpssegment, on_gaps=on_gaps, connection=connection) csegs = cache_segments(cache) gaps = abs(gpssegment) - abs(csegs) # search the TOC for one frame file and match any channels i = 0 nchan = len(names) for n in iter_channel_names(path): if n in names: i += 1 c = chans[n] # map back to user-given channel name try: match[c].append((ftype, path, -gaps)) except KeyError: match[c] = [(ftype, path, -gaps)] if not return_all: # match once only names.remove(n) if not names or n == nchan: # break out of TOC loop break if not names: # break out of ftype loop if all names matched break try: names.pop() except KeyError: # done break # raise exception if nothing found missing = set(channels) - set(match.keys()) if missing: msg = "Cannot locate the following channel(s) in any known frametype" if gpstime: msg += " at GPS=%d" % gpstime msg += ":\n {}".format('\n '.join(missing)) if not allow_tape: msg += ("\n[files on tape have not been checked, use " "allow_tape=True for a complete search]") raise ValueError(msg) # if matching all types, rank based on coverage, tape, and TOC size if return_all: paths = set(p[1] for key in match for p in match[key]) rank = {path: (on_tape(path), num_channels(path)) for path in paths} # deprioritise types on tape and those with lots of channels for key in match: match[key].sort(key=lambda x: (x[2],) + rank[x[1]]) # remove instance paths (just leave channel and list of frametypes) ftypes = {key: list(list(zip(*match[key]))[0]) for key in match} else: ftypes = {key: match[key][0][0] for key in match} # if given a list of channels, return a dict if isinstance(channel, (list, tuple)): return ftypes # otherwise just return a list for this type return ftypes[str(channel)]
def read_timeseriesdict(source, channels, start=None, end=None, dtype=None, resample=None, gap=None, pad=None, nproc=1, series_class=TimeSeries, **kwargs): """Read the data for a list of channels from a GWF data source Parameters ---------- source : `str`, `list` Source of data, any of the following: - `str` path of single data file, - `str` path of LAL-format cache file, - `list` of paths. channels : `list` list of channel names (or `Channel` objects) to read from frame. start : `~gwpy.time.LIGOTimeGPS`, `float`, `str`, optional GPS start time of required data, defaults to start of data found; any input parseable by `~gwpy.time.to_gps` is fine end : `~gwpy.time.LIGOTimeGPS`, `float`, `str`, optional GPS end time of required data, defaults to end of data found; any input parseable by `~gwpy.time.to_gps` is fine pad : `float`, optional value with which to fill gaps in the source data, if not given gaps will result in an exception being raised Returns ------- dict : :class:`~gwpy.timeseries.TimeSeriesDict` dict of (channel, `TimeSeries`) data pairs """ # import the frame library here to have any ImportErrors occur early import_gwf_library(library) # -- from here read data if start: start = to_gps(start) if end: end = to_gps(end) # parse output format kwargs -- DEPRECATED if resample is not None: warnings.warn( 'the resample keyword for is deprecated, instead ' 'you should manually resample after reading', DeprecationWarning) if not isinstance(resample, dict): resample = dict((c, resample) for c in channels) if dtype is not None: warnings.warn( 'the dtype keyword for is deprecated, instead ' 'you should manually call astype() after reading', DeprecationWarning) if not isinstance(dtype, dict): dtype = dict((c, dtype) for c in channels) # format gap handling if gap is None and pad is not None: gap = 'pad' elif gap is None: gap = 'raise' # read cache file up-front if (isinstance(source, str) and source.endswith( ('.lcf', '.cache'))) or (isinstance(source, io_cache.FILE_LIKE) and source.name.endswith( ('.lcf', '.cache'))): source = io_cache.read_cache(source) # separate cache into contiguous segments if io_cache.is_cache(source): if start is not None and end is not None: source = io_cache.sieve( source, segment=LigoSegment(start, end), ) source = list(io_cache.find_contiguous(source)) # convert everything else into a list if needed if not isinstance(source, (list, tuple)): source = [source] # now read the data out = series_class.DictClass() for i, src in enumerate(source): if i == 1: # force data into fresh memory so that append works for name in out: out[name] = numpy.require(out[name], requirements=['O']) out.append(libread_(src, channels, start=start, end=end, series_class=series_class, **kwargs), gap=gap, pad=pad, copy=False) # apply resampling and dtype-casting -- DEPRECATED for name in out: if (resample.get(name) and resample[name] != out[name].sample_rate.value): out[name] = out[name].resample(resample[name]) if dtype.get(name) is not None and (numpy.dtype(dtype[name]) != out[name].dtype): out[name] = out[name].astype(dtype[name]) return out
def find_frametype(channel, gpstime=None, frametype_match=None, host=None, port=None, return_all=False, allow_tape=False, connection=None, on_gaps='error'): """Find the frametype(s) that hold data for a given channel Parameters ---------- channel : `str`, `~gwpy.detector.Channel` the channel to be found gpstime : `int`, optional target GPS time at which to find correct type frametype_match : `str`, optional regular expression to use for frametype `str` matching host : `str`, optional name of datafind host to use port : `int`, optional port on datafind host to use return_all : `bool`, optional, default: `False` return all found types, default is to return to 'best' match allow_tape : `bool`, optional, default: `False` do not test types whose frame files are stored on tape (not on spinning disk) on_gaps : `str`, optional action to take when gaps are discovered in datafind coverage, default: ``'error'``, i.e. don't match frametypes with gaps. Select ``'ignore'`` to ignore gaps, or ``'warn'`` to display warnings when gaps are found in a datafind `find_urls` query Returns ------- If a single name is given, and `return_all=False` (default): frametype : `str` name of best match frame type If a single name is given, and `return_all=True`: types : `list` of `str` the list of all matching frame types If multiple names are given, the above return types are wrapped into a `dict` of `(channel, type_or_list)` pairs. Examples -------- >>> from gwpy.io import datafind as io_datafind >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462) 'H1_R' >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462, ... return_all=True) ['H1_R', 'H1_C'] >>> io_datafind.find_frametype( ... ('H1:IMC-PWR_IN_OUTPUT', 'H1:OMC-DCPD_SUM_OUTPUT', ... 'H1:GDS-CALIB_STRAIN'), ... gpstime=1126259462, return_all=True))" {'H1:GDS-CALIB_STRAIN': ['H1_HOFT_C00'], 'H1:OMC-DCPD_SUM_OUTPUT': ['H1_R', 'H1_C'], 'H1:IMC-PWR_IN_OUTPUT': ['H1_R', 'H1_C']} """ # this function is now horrendously complicated to support a large # number of different use cases, hopefully the comments are sufficient from ..detector import Channel # format channel names as list if isinstance(channel, (list, tuple)): channels = channel else: channels = [channel] # create set() of GWF channel names, and dict map back to user names # this allows users to use nds-style names in this query, e.g. # 'X1:TEST.mean,m-trend', and still get results channels = {c: Channel(c).name for c in channels} names = {val: key for key, val in channels.items()} # format GPS time(s) if isinstance(gpstime, (list, tuple)): gpssegment = LigoSegment(*gpstime) gpstime = gpssegment[0] else: gpssegment = None if gpstime is not None: gpstime = int(to_gps(gpstime)) # if use gaps post-S5 GPStime, forcibly skip _GRBYYMMDD frametypes at CIT if frametype_match is None and gpstime is not None and gpstime > 875232014: frametype_match = GRB_TYPE # -- go match = defaultdict(list) searched = set() for ifo, trend in _parse_ifos_and_trends(channels): # find all types (prioritising trends if we need to) types = find_types( ifo, match=frametype_match, trend=trend, connection=connection, ) # loop over types testing each in turn for ftype in types: # if we've already search this type for this IFO, # don't do it again if (ifo, ftype) in searched: continue # find instance of this frametype try: path = find_latest( ifo, ftype, gpstime=gpstime, allow_tape=allow_tape, connection=connection, on_missing='ignore', ) except (RuntimeError, IOError, IndexError): # something went wrong continue # check for gaps in the record for this type gaps = _find_gaps(ifo, ftype, gpssegment, on_gaps, connection) # search the TOC for one frame file and match any channels found = 0 nchan = len(names) try: for n in iter_channel_names(path): if n in names: # frametype includes this channel! # count how many channels we have found in this type found += 1 # record the match using the user-given channel name match[names[n]].append((ftype, path, gaps)) # if only matching once, don't search other types # for this channel if not return_all: names.pop(n) if found == nchan: # all channels have been found break except RuntimeError as exc: # failed to open file (probably) warnings.warn( "failed to read channels for type {!r}: {}:".format( ftype, str(exc), ), ) continue # record this type as having been searched searched.add((ifo, ftype)) if not names: # if all channels matched, stop break # raise exception if one or more channels were not found _error_missing_channels(channels, match.keys(), gpstime, allow_tape) # rank types (and pick best if required) _rank_types(match) # and format as a dict for each channel output = {key: list(list(zip(*match[key]))[0]) for key in match} if not return_all: # reduce the list-of-one to a single element output = {key: val[0] for key, val in output.items()} # if given a list of channels, return the dict if isinstance(channel, (list, tuple)): return output # otherwise just return the result for the given channel return output[str(channel)]