Esempio n. 1
0
def cache_overlaps(*caches):
    """Find segments of overlap in the given cache sets
    """
    cache = [e for c in caches for e in c]
    cache.sort(key=lambda e: file_segment(e)[0])
    overlap = SegmentList()
    segments = SegmentList()
    for e in cache:
        seg = file_segment(e)
        ol = SegmentList([seg]) & segments
        if abs(ol):
            overlap.extend(ol)
        segments.append(seg)
    return overlap
Esempio n. 2
0
def cache_overlaps(*caches):
    """Find segments of overlap in the given cache sets
    """
    cache = [e for c in caches for e in c]
    cache.sort(key=lambda e: file_segment(e)[0])
    overlap = SegmentList()
    segments = SegmentList()
    for e in cache:
        seg = file_segment(e)
        ol = SegmentList([seg]) & segments
        if abs(ol):
            overlap.extend(ol)
        segments.append(seg)
    return overlap
Esempio n. 3
0
def find_best_frames(ifo, frametype, start, end, **kwargs):
    """Find frames for the given type, replacing with a better type if needed
    """
    # find cache for this frametype
    cache = find_frames(ifo, frametype, start, end, **kwargs)

    # check for gaps in current cache
    span = SegmentList([Segment(start, end)])
    gaps = span - cache_segments(cache)

    # if gaps and using aggregated h(t), check short files
    if abs(gaps) and frametype in SHORT_HOFT_TYPES:
        f2 = SHORT_HOFT_TYPES[frametype]
        vprint("    Gaps discovered in aggregated h(t) type "
               "%s, checking %s\n" % (frametype, f2))
        kwargs['gaps'] = 'ignore'
        cache.extend(
            filter(lambda e: file_segment(e) in gaps,
                   find_frames(ifo, f2, start, end, **kwargs)))
        new = int(abs(gaps - cache_segments(cache)))
        if new:
            vprint("    %ss extra coverage with frametype %s\n" % (new, f2))
        else:
            vprint("    No extra coverage with frametype %s\n" % f2)

    return cache, frametype
Esempio n. 4
0
 def test_file_segment(self):
     """Test :func:`gwpy.io.cache.file_segment`
     """
     # check basic
     fs = io_cache.file_segment('A-B-1-2.ext')
     assert isinstance(fs, Segment)
     assert fs == Segment(1, 3)
     # check mutliple file extensions
     assert io_cache.file_segment('A-B-1-2.ext.gz') == (1, 3)
     # check floats (and multiple file extensions)
     assert io_cache.file_segment('A-B-1.23-4.ext.gz') == (1.23, 5.23)
     # test errors
     with pytest.raises(ValueError) as exc:
         io_cache.file_segment('blah')
     assert str(exc.value) == ('Failed to parse \'blah\' as '
                               'LIGO-T050017-compatible filename')
Esempio n. 5
0
def find_best_frames(ifo, frametype, start, end, **kwargs):
    """Find frames for the given type, replacing with a better type if needed
    """
    # find cache for this frametype
    cache = find_frames(ifo, frametype, start, end, **kwargs)

    # check for gaps in current cache
    span = SegmentList([Segment(start, end)])
    gaps = span - cache_segments(cache)

    # if gaps and using aggregated h(t), check short files
    if abs(gaps) and frametype in SHORT_HOFT_TYPES:
        f2 = SHORT_HOFT_TYPES[frametype]
        vprint("    Gaps discovered in aggregated h(t) type "
               "%s, checking %s\n" % (frametype, f2))
        kwargs['gaps'] = 'ignore'
        cache.extend(filter(lambda e: file_segment(e) in gaps,
                            find_frames(ifo, f2, start, end, **kwargs)))
        new = int(abs(gaps - cache_segments(cache)))
        if new:
            vprint("    %ss extra coverage with frametype %s\n" % (new, f2))
        else:
            vprint("    No extra coverage with frametype %s\n" % f2)

    return cache, frametype
Esempio n. 6
0
 def test_file_segment(self):
     """Test :func:`gwpy.io.cache.file_segment`
     """
     # check basic
     fs = io_cache.file_segment('A-B-1-2.ext')
     assert isinstance(fs, Segment)
     assert fs == Segment(1, 3)
     # check mutliple file extensions
     assert io_cache.file_segment('A-B-1-2.ext.gz') == (1, 3)
     # check floats (and multiple file extensions)
     assert io_cache.file_segment('A-B-1.23-4.ext.gz') == (1.23, 5.23)
     # test errors
     with pytest.raises(ValueError) as exc:
         io_cache.file_segment('blah')
     assert str(exc.value) == ('Failed to parse \'blah\' as '
                               'LIGO-T050017-compatible filename')
Esempio n. 7
0
def find_omicron_files(channel, start, end, basepath, ext='xml.gz',
                       filetag=const.OMICRON_FILETAG.upper()):
    """Find Omicron files under a given starting directory
    """
    gps5 = int(str(start)[:5])-1
    cache = list()
    span = Segment(start, end)
    while gps5 <= int(str(end)[:5]):
        new = _iter_files_in_gps_directory(channel, basepath, gps5,
                                           ext, filetag=filetag)
        cache.extend(path for path in new if
                     file_segment(path).intersects(span))
        gps5 += 1
    return cache
Esempio n. 8
0
 def read_and_cache_events(channel,
                           etg,
                           cache=None,
                           trigfind_kw={},
                           **read_kw):
     cfile = create_path(channel)
     # read existing cached triggers and work out new segments to query
     if args.append and cfile.is_file():
         previous = DataQualityFlag.read(
             str(cfile),
             path='segments',
             format='hdf5',
         ).coalesce()
         new = analysis - previous
     else:
         new = analysis.copy()
     # get cache of files
     if cache is None:
         cache = find_trigger_files(channel, etg, new.active, **trigfind_kw)
     else:
         cache = list(
             filter(
                 lambda e: new.active.intersects_segment(file_segment(e)),
                 cache,
             ))
     # restrict 'active' segments to when we have data
     try:
         new.active &= cache_segments(cache)
     except IndexError:
         new.active = type(new.active)()
     # find new triggers
     try:
         trigs = get_triggers(channel,
                              etg,
                              new.active,
                              cache=cache,
                              raw=True,
                              **read_kw)
     # catch error and continue
     except ValueError as e:
         warnings.warn('%s: %s' % (type(e).__name__, str(e)))
     else:
         path = write_events(channel, trigs, new)
         try:
             return path, len(trigs)
         except TypeError:  # None
             return
Esempio n. 9
0
def find_archive_latency(channel,
                         padding,
                         frametype=None,
                         state=None,
                         base=const.OMICRON_ARCHIVE):
    """Find the latency of Omicron file archival for the given channel

    Parameters
    ----------
    channel : `str`
        name of channel
    padding : `int`
        padding parameter for Omicron processing
    frametype : `str`, optional
        frame type ID for data frame files
    state : `str`, optional
        name of DQSegDB flag defining operational state for this channel
    base : `str`, optional
        base directory for Omicron archive

    Returns
    -------
    latency : `dict`
        a `dict` of `(ext, latency)` pairs for each file extension stored in
        the archive ('root', 'xml.gz')
    """
    ifo = channel[:2]
    obs = ifo[0]
    # find latest GPS time
    if state is None and frametype is None:
        raise ValueError("Please give one of `state` or `frametype`")
    if state is None:
        target = get_latest_data_gps(obs, frametype)
    else:
        target = get_latest_active_gps(state)
    target -= padding
    # find latest file
    latency = {}
    for ext in ['root', 'xml.gz']:
        f = find_latest_omicron_file(channel, base, ext=ext)
        end = file_segment(f)[1]
        latency[ext] = (int(target - end), f)
    return latency
Esempio n. 10
0
def find_omicron_files(channel,
                       start,
                       end,
                       basepath,
                       ext='xml.gz',
                       filetag=const.OMICRON_FILETAG.upper()):
    """Find Omicron files under a given starting directory
    """
    gps5 = int(str(start)[:5]) - 1
    cache = list()
    span = Segment(start, end)
    while gps5 <= int(str(end)[:5]):
        new = _iter_files_in_gps_directory(channel,
                                           basepath,
                                           gps5,
                                           ext,
                                           filetag=filetag)
        cache.extend(path for path in new
                     if file_segment(path).intersects(span))
        gps5 += 1
    return cache
Esempio n. 11
0
def find_frames(ifo,
                frametype,
                gpsstart,
                gpsend,
                config=GWSummConfigParser(),
                urltype='file',
                gaps='warn',
                onerror='raise'):
    """Query the datafind server for GWF files for the given type

    Parameters
    ----------
    ifo : `str`
        prefix for the IFO of interest (either one or two characters)

    frametype : `str`
        name of the frametype to find

    gpsstart : `int`
        GPS start time of the query

    gpsend : `int`
        GPS end time of the query

    config : `~ConfigParser.ConfigParser`, optional
        configuration with `[datafind]` section containing `server`
        specification, otherwise taken from the environment

    urltype : `str`, optional
        what type of file paths to return, default: `file`

    gaps : `str`, optional
        what to do when gaps are detected, one of

        - `ignore` : do nothing
        - `warn` : display the existence of gaps but carry on
        - `raise` : raise an exception

    onerror : `str`, optional
        what to do when the `gwdatafind` query itself fails, same
        options as for ``gaps``

    Returns
    -------
    cache : `list` of `str`
        a list of file paths pointing at GWF files matching the request
    """
    vprint('    Finding %s-%s frames for [%d, %d)...' %
           (ifo[0], frametype, int(gpsstart), int(gpsend)))
    # find datafind host:port
    try:
        host = config.get('datafind', 'server')
    except (NoOptionError, NoSectionError):
        host = None
        port = None
    else:
        port = config.getint('datafind', 'port')

    # XXX HACK: LLO changed frame types on Dec 6 2013:
    LLOCHANGE = 1070291904
    if re.match(r'L1_{CRMT}', frametype) and gpsstart < LLOCHANGE:
        frametype = frametype[-1]

    # query frames
    ifo = ifo[0].upper()
    gpsstart = int(floor(gpsstart))
    gpsend = int(ceil(min(globalv.NOW, gpsend)))
    if gpsend <= gpsstart:
        return []

    # parse match
    try:
        frametype, match = frametype.split('|', 1)
    except ValueError:
        match = None

    def _query():
        return gwdatafind.find_urls(ifo[0].upper(),
                                    frametype,
                                    gpsstart,
                                    gpsend,
                                    urltype=urltype,
                                    on_gaps=gaps,
                                    match=match,
                                    host=host,
                                    port=port)

    try:
        cache = _query()
    except RuntimeError as e:
        sleep(1)
        try:
            cache = _query()
        except RuntimeError:
            if 'Invalid GPS times' in str(e):
                e.args = ('%s: %d ... %s' % (str(e), gpsstart, gpsend), )
            if onerror in ['ignore', None]:
                pass
            elif onerror in ['warn']:
                warnings.warn('Caught %s: %s' % (type(e).__name__, str(e)))
            else:
                raise
            cache = []

    # XXX: if querying for day of LLO frame type change, do both
    if (ifo[0].upper() == 'L' and frametype in ['C', 'R', 'M', 'T']
            and gpsstart < LLOCHANGE < gpsend):
        start = len(cache) and cache[-1].segment[1] or gpsstart
        if start < gpsend:
            cache.extend(
                gwdatafind.find_urls(ifo[0].upper(),
                                     'L1_%s' % frametype,
                                     start,
                                     gpsend,
                                     urltype=urltype,
                                     on_gaps=gaps,
                                     host=host,
                                     port=port)[1:])

    # extend cache beyond datafind's knowledge to reduce latency
    try:
        latest = cache[-1]
        ngps = len(
            re_gwf_gps_epoch.search(
                os.path.dirname(latest)).groupdict()['gpsepoch'])
    except (IndexError, AttributeError):
        pass
    else:
        while True:
            s, e = file_segment(latest)
            if s >= gpsend:
                break
            # replace GPS time of file basename
            new = latest.replace('-%d-' % s, '-%d-' % e)
            # replace GPS epoch in dirname
            new = new.replace('%s/' % str(s)[:ngps], '%s/' % str(e)[:ngps])
            if os.path.isfile(new):
                cache.append(new)
            else:
                break

    # validate files existing and return
    cache = list(filter(os.path.exists, map(_urlpath, cache)))
    vprint(' %d found.\n' % len(cache))
    return cache
Esempio n. 12
0
def is_saturated(channel,
                 cache,
                 start=None,
                 end=None,
                 indicator='LIMEN',
                 nproc=DEFAULT_NPROC):
    """Check whether a channel has saturated its software limit

    Parameters
    ----------
    channel : `str`, or `list` of `str`
        either a single channel name, or a list of channel names

    cache : `list`
        a `list` of file paths, the cache must be contiguous

    start : `~gwpy.time.LIGOTimeGPS`, `int`
        the GPS start time of the check

    end : `~gwpy.time.LIGOTimeGPS`, `int`
        the GPS end time of the check

    indicator : `str`
        the suffix of the indicator channel, either `'LIMEN'` or `'SWSTAT'`

    nproc : `int`
        the number of parallel processes to use for frame reading

    Returns
    -------
    saturated : `bool`, `None`, or `DataQualityFlag`, or `list` of the same
        one of the following given the conditions
        - `None` : if the channel doesn't have a software limit
        - `False` : if the channel didn't saturate
        - `~gwpy.segments.DataQualityFlag` : otherwise
        OR, a `list` of the above if a `list` of channels was given in the
        first place
    """
    if isinstance(channel, (list, tuple)):
        channels = channel
    else:
        channels = [channel]
    # parse prefix
    for i, c in enumerate(channels):
        if c.endswith('_LIMIT'):
            channels[i] = c[:-6]
    # check limit if set
    indicators = ['{}_{}'.format(c, indicator) for c in channels]
    gps = file_segment(cache[0])[0]
    data = get_data(indicators, gps, gps + 1, source=cache, nproc=nproc)

    # check limits for returned channels
    if len(data) < len(channels):  # exclude nonexistent channels
        channels = [
            c for c in channels if '{}_{}'.format(c, indicator) in data
        ]
        indicators = ['{}_{}'.format(c, indicator) for c in channels]
    if indicator.upper() == 'LIMEN':
        active = dict(
            (c, data[indicators[i]].value[0]) for i, c in enumerate(channels))
    elif indicator.upper() == 'SWSTAT':
        active = dict(
            (c, data[indicators[i]].astype('uint32').value[0] >> 13 & 1)
            for i, c in enumerate(channels))
    else:
        raise ValueError("Don't know how to determine if limit is set for "
                         "indicator %r" % indicator)
    # get output/limit data for all with active limits
    activechans = [c for c in channels if active[c]]
    datachans = [
        '%s_%s' % (c, s) for c in activechans for s in ('LIMIT', 'OUTPUT')
    ]
    data = get_data(datachans, start, end, source=cache, nproc=nproc)

    # find saturations of the limit for each channel
    dataiter = ((data['%s_OUTPUT' % c], data['%s_LIMIT' % c])
                for c in activechans)
    if nproc > 1:
        with Pool(processes=nproc) as pool:
            saturations = list(pool.map(_find_saturations, dataiter))
    else:
        saturations = list(map(_find_saturations, dataiter))

    # return many or one (based on input)
    if isinstance(channel, (list, tuple)):
        return saturations
    else:
        return saturations[0]
Esempio n. 13
0
def is_saturated(channel, cache, start=None, end=None, indicator='LIMEN',
                 nproc=DEFAULT_NPROC):
    """Check whether a channel has saturated its software limit

    Parameters
    ----------
    channel : `str`, or `list` of `str`
        either a single channel name, or a list of channel names

    cache : `list`
        a `list` of file paths, the cache must be contiguous

    start : `~gwpy.time.LIGOTimeGPS`, `int`
        the GPS start time of the check

    end : `~gwpy.time.LIGOTimeGPS`, `int`
        the GPS end time of the check

    indicator : `str`
        the suffix of the indicator channel, either `'LIMEN'` or `'SWSTAT'`

    nproc : `int`
        the number of parallel processes to use for frame reading

    Returns
    -------
    saturated : `bool`, `None`, or `DataQualityFlag`, or `list` of the same
        one of the following given the conditions
        - `None` : if the channel doesn't have a software limit
        - `False` : if the channel didn't saturate
        - `~gwpy.segments.DataQualityFlag` : otherwise
        OR, a `list` of the above if a `list` of channels was given in the
        first place
    """
    if isinstance(channel, (list, tuple)):
        channels = channel
    else:
        channels = [channel]
    # parse prefix
    for i, c in enumerate(channels):
        if c.endswith('_LIMIT'):
            channels[i] = c[:-6]
    # check limit if set
    indicators = ['{}_{}'.format(c, indicator) for c in channels]
    gps = file_segment(cache[0])[0]
    data = get_data(indicators, gps, gps+1, source=cache, nproc=nproc)

    # check limits for returned channels
    if len(data) < len(channels):  # exclude nonexistent channels
        channels = [
            c for c in channels if '{}_{}'.format(c, indicator) in data]
        indicators = ['{}_{}'.format(c, indicator) for c in channels]
    if indicator.upper() == 'LIMEN':
        active = dict((c, data[indicators[i]].value[0]) for
                      i, c in enumerate(channels))
    elif indicator.upper() == 'SWSTAT':
        active = dict(
            (c, data[indicators[i]].astype('uint32').value[0] >> 13 & 1) for
            i, c in enumerate(channels))
    else:
        raise ValueError("Don't know how to determine if limit is set for "
                         "indicator %r" % indicator)
    # get output/limit data for all with active limits
    activechans = [c for c in channels if active[c]]
    datachans = ['%s_%s' % (c, s) for c in activechans for
                 s in ('LIMIT', 'OUTPUT')]
    data = get_data(datachans, start, end, source=cache, nproc=nproc)

    # find saturations of the limit for each channel
    dataiter = ((data['%s_OUTPUT' % c], data['%s_LIMIT' % c])
                for c in activechans)
    if nproc > 1:
        pool = Pool(processes=nproc)
        saturations = list(pool.map(_find_saturations, dataiter))
        pool.close()
    else:
        saturations = list(map(_find_saturations, dataiter))

    # return many or one (based on input)
    if isinstance(channel, (list, tuple)):
        return saturations
    else:
        return saturations[0]
Esempio n. 14
0
def find_frames(ifo, frametype, gpsstart, gpsend, config=GWSummConfigParser(),
                urltype='file', gaps='warn', onerror='raise'):
    """Query the datafind server for GWF files for the given type

    Parameters
    ----------
    ifo : `str`
        prefix for the IFO of interest (either one or two characters)

    frametype : `str`
        name of the frametype to find

    gpsstart : `int`
        GPS start time of the query

    gpsend : `int`
        GPS end time of the query

    config : `~ConfigParser.ConfigParser`, optional
        configuration with `[datafind]` section containing `server`
        specification, otherwise taken from the environment

    urltype : `str`, optional
        what type of file paths to return, default: `file`

    gaps : `str`, optional
        what to do when gaps are detected, one of

        - `ignore` : do nothing
        - `warn` : display the existence of gaps but carry on
        - `raise` : raise an exception

    onerror : `str`, optional
        what to do when the `gwdatafind` query itself fails, same
        options as for ``gaps``

    Returns
    -------
    cache : `list` of `str`
        a list of file paths pointing at GWF files matching the request
    """
    vprint('    Finding %s-%s frames for [%d, %d)...'
           % (ifo[0], frametype, int(gpsstart), int(gpsend)))
    # find datafind host:port
    try:
        host = config.get('datafind', 'server')
    except (NoOptionError, NoSectionError):
        host = None
        port = None
    else:
        port = config.getint('datafind', 'port')

    # XXX HACK: LLO changed frame types on Dec 6 2013:
    LLOCHANGE = 1070291904
    if re.match(r'L1_{CRMT}', frametype) and gpsstart < LLOCHANGE:
        frametype = frametype[-1]

    # query frames
    ifo = ifo[0].upper()
    gpsstart = int(floor(gpsstart))
    gpsend = int(ceil(min(globalv.NOW, gpsend)))
    if gpsend <= gpsstart:
        return []

    # parse match
    try:
        frametype, match = frametype.split('|', 1)
    except ValueError:
        match = None

    def _query():
        return gwdatafind.find_urls(ifo[0].upper(), frametype, gpsstart,
                                    gpsend, urltype=urltype, on_gaps=gaps,
                                    match=match, host=host, port=port)
    try:
        cache = _query()
    except RuntimeError as e:
        sleep(1)
        try:
            cache = _query()
        except RuntimeError:
            if 'Invalid GPS times' in str(e):
                e.args = ('%s: %d ... %s' % (str(e), gpsstart, gpsend),)
            if onerror in ['ignore', None]:
                pass
            elif onerror in ['warn']:
                warnings.warn('Caught %s: %s'
                              % (type(e).__name__, str(e)))
            else:
                raise
            cache = []

    # XXX: if querying for day of LLO frame type change, do both
    if (ifo[0].upper() == 'L' and frametype in ['C', 'R', 'M', 'T'] and
            gpsstart < LLOCHANGE < gpsend):
        start = len(cache) and cache[-1].segment[1] or gpsstart
        if start < gpsend:
            cache.extend(gwdatafind.find_urls(
                ifo[0].upper(), 'L1_%s' % frametype, start, gpsend,
                urltype=urltype, on_gaps=gaps, host=host, port=port)[1:])

    # extend cache beyond datafind's knowledge to reduce latency
    try:
        latest = cache[-1]
        ngps = len(re_gwf_gps_epoch.search(
            os.path.dirname(latest)).groupdict()['gpsepoch'])
    except (IndexError, AttributeError):
        pass
    else:
        while True:
            s, e = file_segment(latest)
            if s >= gpsend:
                break
            # replace GPS time of file basename
            new = latest.replace('-%d-' % s, '-%d-' % e)
            # replace GPS epoch in dirname
            new = new.replace('%s/' % str(s)[:ngps], '%s/' % str(e)[:ngps])
            if os.path.isfile(new):
                cache.append(new)
            else:
                break

    # validate files existing and return
    cache = list(filter(os.path.exists, map(_urlpath, cache)))
    vprint(' %d found.\n' % len(cache))
    return cache