Example #1
0
    def find_urls(self, site, frametype, gpsstart, gpsend,
                  match=None, on_gaps='warn'):
        """Find all files of the given type in the [start, end) GPS interval.
        """
        span = LigoSegment(gpsstart, gpsend)
        cache = [e for e in self._read_ffl_cache(site, frametype) if
                 e.observatory == site and e.description == frametype and
                 e.segment.intersects(span)]
        urls = [e.path for e in cache]
        missing = LigoSegmentList([span]) - cache_segments(cache)

        if match:
            match = re.compile(match)
            urls = list(filter(match.search, urls))

        # no missing data or don't care, return
        if on_gaps == 'ignore' or not missing:
            return urls

        # handle missing data
        msg = 'Missing segments: \n{0}'.format('\n'.join(map(str, missing)))
        if on_gaps == 'warn':
            warnings.warn(msg)
            return urls
        raise RuntimeError(msg)
Example #2
0
def find_frametype(channel, gpstime=None, frametype_match=None,
                   host=None, port=None, return_all=False, allow_tape=False,
                   connection=None, on_gaps='error'):
    """Find the frametype(s) that hold data for a given channel

    Parameters
    ----------
    channel : `str`, `~gwpy.detector.Channel`
        the channel to be found

    gpstime : `int`, optional
        target GPS time at which to find correct type

    frametype_match : `str`, optional
        regular expression to use for frametype `str` matching

    host : `str`, optional
        name of datafind host to use

    port : `int`, optional
        port on datafind host to use

    return_all : `bool`, optional, default: `False`
        return all found types, default is to return to 'best' match

    allow_tape : `bool`, optional, default: `False`
        do not test types whose frame files are stored on tape (not on
        spinning disk)

    on_gaps : `str`, optional
        action to take when gaps are discovered in datafind coverage,
        default: ``'error'``, i.e. don't match frametypes with gaps.
        Select ``'ignore'`` to ignore gaps, or ``'warn'`` to display
        warnings when gaps are found in a datafind `find_urls` query

    Returns
    -------
    If a single name is given, and `return_all=False` (default):

    frametype : `str`
        name of best match frame type

    If a single name is given, and `return_all=True`:

    types : `list` of `str`
        the list of all matching frame types

    If multiple names are given, the above return types are wrapped into a
    `dict` of `(channel, type_or_list)` pairs.

    Examples
    --------
    >>> from gwpy.io import datafind as io_datafind
    >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462)
    'H1_R'
    >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462,
    ...                            return_all=True)
    ['H1_R', 'H1_C']
    >>> io_datafind.find_frametype(
    ...     ('H1:IMC-PWR_IN_OUTPUT', 'H1:OMC-DCPD_SUM_OUTPUT',
    ...      'H1:GDS-CALIB_STRAIN'),
    ...     gpstime=1126259462, return_all=True))"
    {'H1:GDS-CALIB_STRAIN': ['H1_HOFT_C00'],
     'H1:OMC-DCPD_SUM_OUTPUT': ['H1_R', 'H1_C'],
     'H1:IMC-PWR_IN_OUTPUT': ['H1_R', 'H1_C']}
    """
    # this function is now horrendously complicated to support a large
    # number of different use cases, hopefully the comments are sufficient

    from ..detector import Channel

    # format channel names as list
    if isinstance(channel, (list, tuple)):
        channels = channel
    else:
        channels = [channel]

    # create set() of GWF channel names, and dict map back to user names
    #    this allows users to use nds-style names in this query, e.g.
    #    'X1:TEST.mean,m-trend', and still get results
    chans = {Channel(c).name: c for c in channels}
    names = set(chans.keys())

    # format GPS time(s)
    if isinstance(gpstime, (list, tuple)):
        gpssegment = LigoSegment(*gpstime)
        gpstime = gpssegment[0]
    else:
        gpssegment = None
    if gpstime is not None:
        gpstime = int(to_gps(gpstime))

    # if use gaps post-S5 GPStime, forcibly skip _GRBYYMMDD frametypes at CIT
    if frametype_match is None and gpstime is not None and gpstime > 875232014:
        frametype_match = GRB_TYPE

    # -- go

    match = {}
    ifos = set()  # record IFOs we have queried to prevent duplication

    # loop over set of names, which should get smaller as names are searched
    while names:
        # parse first channel name (to get IFO and channel type)
        try:
            name = next(iter(names))
        except KeyError:
            break
        else:
            chan = Channel(chans[name])

        # parse IFO ID
        try:
            ifo = chan.ifo[0]
        except TypeError:  # chan.ifo is None
            raise ValueError("Cannot parse interferometer prefix from channel "
                             "name %r, cannot proceed with find()" % str(chan))

        # if we've already gone through the types for this IFO, skip
        if ifo in ifos:
            names.pop()
            continue
        ifos.add(ifo)

        types = find_types(ifo, match=frametype_match, trend=chan.type,
                           connection=connection)

        # loop over types testing each in turn
        for ftype in types:
            # find instance of this frametype
            try:
                path = find_latest(ifo, ftype, gpstime=gpstime,
                                   allow_tape=allow_tape,
                                   connection=connection, on_missing='ignore')
            except (RuntimeError, IOError, IndexError):  # something went wrong
                continue

            # check for gaps in the record for this type
            if gpssegment is None:
                gaps = 0
            else:
                cache = find_urls(ifo, ftype, *gpssegment, on_gaps=on_gaps,
                                  connection=connection)
                csegs = cache_segments(cache)
                gaps = abs(gpssegment) - abs(csegs)

            # search the TOC for one frame file and match any channels
            i = 0
            nchan = len(names)
            for n in iter_channel_names(path):
                if n in names:
                    i += 1
                    c = chans[n]  # map back to user-given channel name
                    try:
                        match[c].append((ftype, path, -gaps))
                    except KeyError:
                        match[c] = [(ftype, path, -gaps)]
                    if not return_all:  # match once only
                        names.remove(n)
                    if not names or n == nchan:  # break out of TOC loop
                        break

            if not names:  # break out of ftype loop if all names matched
                break

        try:
            names.pop()
        except KeyError:  # done
            break

    # raise exception if nothing found
    missing = set(channels) - set(match.keys())
    if missing:
        msg = "Cannot locate the following channel(s) in any known frametype"
        if gpstime:
            msg += " at GPS=%d" % gpstime
        msg += ":\n    {}".format('\n    '.join(missing))
        if not allow_tape:
            msg += ("\n[files on tape have not been checked, use "
                    "allow_tape=True for a complete search]")
        raise ValueError(msg)

    # if matching all types, rank based on coverage, tape, and TOC size
    if return_all:
        paths = set(p[1] for key in match for p in match[key])
        rank = {path: (on_tape(path), num_channels(path)) for path in paths}
        # deprioritise types on tape and those with lots of channels
        for key in match:
            match[key].sort(key=lambda x: (x[2],) + rank[x[1]])
        # remove instance paths (just leave channel and list of frametypes)
        ftypes = {key: list(list(zip(*match[key]))[0]) for key in match}
    else:
        ftypes = {key: match[key][0][0] for key in match}

    # if given a list of channels, return a dict
    if isinstance(channel, (list, tuple)):
        return ftypes

    # otherwise just return a list for this type
    return ftypes[str(channel)]
Example #3
0
    def read_timeseriesdict(source,
                            channels,
                            start=None,
                            end=None,
                            dtype=None,
                            resample=None,
                            gap=None,
                            pad=None,
                            nproc=1,
                            series_class=TimeSeries,
                            **kwargs):
        """Read the data for a list of channels from a GWF data source

        Parameters
        ----------
        source : `str`, `list`
            Source of data, any of the following:

            - `str` path of single data file,
            - `str` path of LAL-format cache file,
            - `list` of paths.

        channels : `list`
            list of channel names (or `Channel` objects) to read from frame.

        start : `~gwpy.time.LIGOTimeGPS`, `float`, `str`, optional
            GPS start time of required data, defaults to start of data found;
            any input parseable by `~gwpy.time.to_gps` is fine

        end : `~gwpy.time.LIGOTimeGPS`, `float`, `str`, optional
            GPS end time of required data, defaults to end of data found;
            any input parseable by `~gwpy.time.to_gps` is fine

        pad : `float`, optional
            value with which to fill gaps in the source data, if not
            given gaps will result in an exception being raised

        Returns
        -------
        dict : :class:`~gwpy.timeseries.TimeSeriesDict`
            dict of (channel, `TimeSeries`) data pairs
        """
        # import the frame library here to have any ImportErrors occur early
        import_gwf_library(library)

        # -- from here read data

        if start:
            start = to_gps(start)
        if end:
            end = to_gps(end)

        # parse output format kwargs -- DEPRECATED
        if resample is not None:
            warnings.warn(
                'the resample keyword for is deprecated, instead '
                'you should manually resample after reading',
                DeprecationWarning)
        if not isinstance(resample, dict):
            resample = dict((c, resample) for c in channels)
        if dtype is not None:
            warnings.warn(
                'the dtype keyword for is deprecated, instead '
                'you should manually call astype() after reading',
                DeprecationWarning)
        if not isinstance(dtype, dict):
            dtype = dict((c, dtype) for c in channels)

        # format gap handling
        if gap is None and pad is not None:
            gap = 'pad'
        elif gap is None:
            gap = 'raise'

        # read cache file up-front
        if (isinstance(source, str) and source.endswith(
            ('.lcf', '.cache'))) or (isinstance(source, io_cache.FILE_LIKE)
                                     and source.name.endswith(
                                         ('.lcf', '.cache'))):
            source = io_cache.read_cache(source)
        # separate cache into contiguous segments
        if io_cache.is_cache(source):
            if start is not None and end is not None:
                source = io_cache.sieve(
                    source,
                    segment=LigoSegment(start, end),
                )
            source = list(io_cache.find_contiguous(source))
        # convert everything else into a list if needed
        if not isinstance(source, (list, tuple)):
            source = [source]

        # now read the data
        out = series_class.DictClass()
        for i, src in enumerate(source):
            if i == 1:  # force data into fresh memory so that append works
                for name in out:
                    out[name] = numpy.require(out[name], requirements=['O'])
            out.append(libread_(src,
                                channels,
                                start=start,
                                end=end,
                                series_class=series_class,
                                **kwargs),
                       gap=gap,
                       pad=pad,
                       copy=False)

        # apply resampling and dtype-casting -- DEPRECATED
        for name in out:
            if (resample.get(name)
                    and resample[name] != out[name].sample_rate.value):
                out[name] = out[name].resample(resample[name])
            if dtype.get(name) is not None and (numpy.dtype(dtype[name]) !=
                                                out[name].dtype):
                out[name] = out[name].astype(dtype[name])
        return out
Example #4
0
def find_frametype(channel,
                   gpstime=None,
                   frametype_match=None,
                   host=None,
                   port=None,
                   return_all=False,
                   allow_tape=False,
                   connection=None,
                   on_gaps='error'):
    """Find the frametype(s) that hold data for a given channel

    Parameters
    ----------
    channel : `str`, `~gwpy.detector.Channel`
        the channel to be found

    gpstime : `int`, optional
        target GPS time at which to find correct type

    frametype_match : `str`, optional
        regular expression to use for frametype `str` matching

    host : `str`, optional
        name of datafind host to use

    port : `int`, optional
        port on datafind host to use

    return_all : `bool`, optional, default: `False`
        return all found types, default is to return to 'best' match

    allow_tape : `bool`, optional, default: `False`
        do not test types whose frame files are stored on tape (not on
        spinning disk)

    on_gaps : `str`, optional
        action to take when gaps are discovered in datafind coverage,
        default: ``'error'``, i.e. don't match frametypes with gaps.
        Select ``'ignore'`` to ignore gaps, or ``'warn'`` to display
        warnings when gaps are found in a datafind `find_urls` query

    Returns
    -------
    If a single name is given, and `return_all=False` (default):

    frametype : `str`
        name of best match frame type

    If a single name is given, and `return_all=True`:

    types : `list` of `str`
        the list of all matching frame types

    If multiple names are given, the above return types are wrapped into a
    `dict` of `(channel, type_or_list)` pairs.

    Examples
    --------
    >>> from gwpy.io import datafind as io_datafind
    >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462)
    'H1_R'
    >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462,
    ...                            return_all=True)
    ['H1_R', 'H1_C']
    >>> io_datafind.find_frametype(
    ...     ('H1:IMC-PWR_IN_OUTPUT', 'H1:OMC-DCPD_SUM_OUTPUT',
    ...      'H1:GDS-CALIB_STRAIN'),
    ...     gpstime=1126259462, return_all=True))"
    {'H1:GDS-CALIB_STRAIN': ['H1_HOFT_C00'],
     'H1:OMC-DCPD_SUM_OUTPUT': ['H1_R', 'H1_C'],
     'H1:IMC-PWR_IN_OUTPUT': ['H1_R', 'H1_C']}
    """
    # this function is now horrendously complicated to support a large
    # number of different use cases, hopefully the comments are sufficient

    from ..detector import Channel

    # format channel names as list
    if isinstance(channel, (list, tuple)):
        channels = channel
    else:
        channels = [channel]

    # create set() of GWF channel names, and dict map back to user names
    #    this allows users to use nds-style names in this query, e.g.
    #    'X1:TEST.mean,m-trend', and still get results
    channels = {c: Channel(c).name for c in channels}
    names = {val: key for key, val in channels.items()}

    # format GPS time(s)
    if isinstance(gpstime, (list, tuple)):
        gpssegment = LigoSegment(*gpstime)
        gpstime = gpssegment[0]
    else:
        gpssegment = None
    if gpstime is not None:
        gpstime = int(to_gps(gpstime))

    # if use gaps post-S5 GPStime, forcibly skip _GRBYYMMDD frametypes at CIT
    if frametype_match is None and gpstime is not None and gpstime > 875232014:
        frametype_match = GRB_TYPE

    # -- go

    match = defaultdict(list)
    searched = set()

    for ifo, trend in _parse_ifos_and_trends(channels):
        # find all types (prioritising trends if we need to)
        types = find_types(
            ifo,
            match=frametype_match,
            trend=trend,
            connection=connection,
        )

        # loop over types testing each in turn
        for ftype in types:

            # if we've already search this type for this IFO,
            # don't do it again
            if (ifo, ftype) in searched:
                continue

            # find instance of this frametype
            try:
                path = find_latest(
                    ifo,
                    ftype,
                    gpstime=gpstime,
                    allow_tape=allow_tape,
                    connection=connection,
                    on_missing='ignore',
                )
            except (RuntimeError, IOError, IndexError):  # something went wrong
                continue

            # check for gaps in the record for this type
            gaps = _find_gaps(ifo, ftype, gpssegment, on_gaps, connection)

            # search the TOC for one frame file and match any channels
            found = 0
            nchan = len(names)
            try:
                for n in iter_channel_names(path):
                    if n in names:  # frametype includes this channel!
                        # count how many channels we have found in this type
                        found += 1

                        # record the match using the user-given channel name
                        match[names[n]].append((ftype, path, gaps))

                        # if only matching once, don't search other types
                        # for this channel
                        if not return_all:
                            names.pop(n)

                        if found == nchan:  # all channels have been found
                            break
            except RuntimeError as exc:  # failed to open file (probably)
                warnings.warn(
                    "failed to read channels for type {!r}: {}:".format(
                        ftype,
                        str(exc),
                    ), )
                continue

            # record this type as having been searched
            searched.add((ifo, ftype))

            if not names:  # if all channels matched, stop
                break

    # raise exception if one or more channels were not found
    _error_missing_channels(channels, match.keys(), gpstime, allow_tape)

    # rank types (and pick best if required)
    _rank_types(match)

    # and format as a dict for each channel
    output = {key: list(list(zip(*match[key]))[0]) for key in match}
    if not return_all:  # reduce the list-of-one to a single element
        output = {key: val[0] for key, val in output.items()}

    # if given a list of channels, return the dict
    if isinstance(channel, (list, tuple)):
        return output

    # otherwise just return the result for the given channel
    return output[str(channel)]