Exemple #1
0
 def test_read_ligolw(self):
     flags = DataQualityDict.read(SEGXML)
     self.assertEquals(len(flags.keys()), 2)
     self.assertIn(FLAG1, flags)
     self.assertIn(FLAG2, flags)
     flags = DataQualityDict.read(SEGXML, [FLAG2])
     self.assertEquals(len(flags.keys()), 1)
     self.assertEquals(flags[FLAG2].known, KNOWN2)
     self.assertEquals(flags[FLAG2].active, ACTIVE2)
Exemple #2
0
 def test_read_ligolw(self):
     flags = DataQualityDict.read(SEGXML)
     self.assertEquals(len(flags.keys()), 2)
     self.assertIn(FLAG1, flags)
     self.assertIn(FLAG2, flags)
     flags = DataQualityDict.read(SEGXML, [FLAG2])
     self.assertEquals(len(flags.keys()), 1)
     self.assertEquals(flags[FLAG2].known, KNOWN2)
     self.assertEquals(flags[FLAG2].active, ACTIVE2)
Exemple #3
0
 def test_write_ligolw(self):
     tmpfile = self.tmpfile % 'xml.gz'
     try:
         flags = DataQualityDict.read(SEGXML)
     except Exception as e:
         self.skipTest(str(e))
     try:
         flags.write(tmpfile)
     finally:
         os.remove(tmpfile)
Exemple #4
0
 def test_write_ligolw(self):
     tmpfile = self.tmpfile % 'xml.gz'
     try:
         flags = DataQualityDict.read(SEGXML)
     except Exception as e:
         self.skipTest(str(e))
     try:
         flags.write(tmpfile)
     finally:
         os.remove(tmpfile)
Exemple #5
0
def get_segments(flags,
                 segments,
                 cache=None,
                 url='https://segdb-er.ligo.caltech.edu',
                 **kwargs):
    """Fetch some segments from the segment database

    Parameters
    ----------
    flags : `str`, `list`
        one of more flags for which to query
    segments : `~gwpy.segments.DataQualityFlag`, `~gwpy.segments.SegmentList`
        span over which to query for flag segments
    cache : `~glue.lal.Cache`, optional
        cache of files to use as data source
    url : `str`
        URL of segment database, if ``cache`` is not given
    **kwargs
        other keyword arguments to pass to either
        `~gwpy.segments.DataQualityFlag.read` (if ``cache`` is given) or
        `~gwpy.segments.DataQualityFlag.query` (otherwise)

    Returns
    -------
    segments : `~gwpy.segments.DataQualityFlag`,
               `~gwpy.segments.DataQualityDict`
        a single `~gwpy.segments.DataQualityFlag` (if ``flags`` is given
        as a `str`), or a `~gwpy.segments.DataQualityDict` (if ``flags``
        is given as a `list`)
    """
    # format segments
    if isinstance(segments, DataQualityFlag):
        segments = segments.active
    elif isinstance(segments, tuple):
        segments = [Segment(to_gps(segments[0]), to_gps(segments[1]))]
    segments = SegmentList(segments)

    # get format for files
    if cache is not None and not isinstance(cache, Cache):
        kwargs.setdefault(
            'format',
            _get_valid_format('read', DataQualityFlag, None, None,
                              (cache[0], ), {}))

    # populate an existing set of flags
    if isinstance(flags, (DataQualityFlag, DataQualityDict)):
        return flags.populate(source=cache or url, segments=segments, **kwargs)
    # query one flag
    elif cache is None and isinstance(flags, str):
        return DataQualityFlag.query(flags, segments, url=url, **kwargs)
    # query lots of flags
    elif cache is None:
        return DataQualityDict.query(flags, segments, url=url, **kwargs)
    # read one flag
    elif flags is None or isinstance(flags, str):
        segs = DataQualityFlag.read(cache, flags, coalesce=False, **kwargs)
        if segs.known:
            segs.known &= segments
        else:
            segs.known = segments
        segs.active &= segments
        return segs
    # read lots of flags
    else:
        segs = DataQualityDict.read(cache, flags, coalesce=True, **kwargs)
        for name, flag in segs.items():
            flag.known &= segments
            flag.active &= segments
        return segs
Exemple #6
0
def main(args=None):
    """Run the cache_events tool
    """
    parser = create_parser()
    args = parser.parse_args(args=args)

    ifo = args.ifo
    start = int(args.gpsstart)
    end = int(args.gpsend)
    duration = end - start

    LOGGER.info("-- Welcome to Hveto --")
    LOGGER.info("GPS start time: %d" % start)
    LOGGER.info("GPS end time: %d" % end)
    LOGGER.info("Interferometer: %s" % ifo)

    # -- initialisation -------------------------------

    # read configuration
    cp = config.HvetoConfigParser(ifo=args.ifo)
    cp.read(map(str, args.config_file))
    LOGGER.info("Parsed configuration file(s)")

    # format output directory
    outdir = args.output_directory
    outdir.mkdir(parents=True, exist_ok=True)
    LOGGER.info("Working directory: {}".format(outdir))
    trigdir = outdir / 'triggers'
    trigdir.mkdir(parents=True, exist_ok=True)

    # get segments
    aflag = cp.get('segments', 'analysis-flag')
    url = cp.get('segments', 'url')
    padding = cp.getfloats('segments', 'padding')
    if args.analysis_segments:
        segs_ = DataQualityDict.read(args.analysis_segments, gpstype=float)
        analysis = segs_[aflag]
        span = SegmentList([Segment(start, end)])
        analysis.active &= span
        analysis.known &= span
        analysis.coalesce()
        LOGGER.debug("Segments read from disk")
    else:
        analysis = DataQualityFlag.query(aflag, start, end, url=url)
        LOGGER.debug("Segments recovered from %s" % url)
    analysis.pad(*padding)
    livetime = int(abs(analysis.active))
    livetimepc = livetime / duration * 100.
    LOGGER.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime" %
                (len(analysis.active), aflag, livetime, livetimepc))

    snrs = cp.getfloats('hveto', 'snr-thresholds')
    minsnr = min(snrs)

    # -- utility methods ------------------------------

    def create_path(channel):
        ifo, name = channel.split(':', 1)
        name = name.replace('-', '_')
        return trigdir / "{}-{}-{}-{}.h5".format(ifo, name, start, duration)

    def read_and_cache_events(channel,
                              etg,
                              cache=None,
                              trigfind_kw={},
                              **read_kw):
        cfile = create_path(channel)
        # read existing cached triggers and work out new segments to query
        if args.append and cfile.is_file():
            previous = DataQualityFlag.read(
                str(cfile),
                path='segments',
                format='hdf5',
            ).coalesce()
            new = analysis - previous
        else:
            new = analysis.copy()
        # get cache of files
        if cache is None:
            cache = find_trigger_files(channel, etg, new.active, **trigfind_kw)
        else:
            cache = list(
                filter(
                    lambda e: new.active.intersects_segment(file_segment(e)),
                    cache,
                ))
        # restrict 'active' segments to when we have data
        try:
            new.active &= cache_segments(cache)
        except IndexError:
            new.active = type(new.active)()
        # find new triggers
        try:
            trigs = get_triggers(channel,
                                 etg,
                                 new.active,
                                 cache=cache,
                                 raw=True,
                                 **read_kw)
        # catch error and continue
        except ValueError as e:
            warnings.warn('%s: %s' % (type(e).__name__, str(e)))
        else:
            path = write_events(channel, trigs, new)
            try:
                return path, len(trigs)
            except TypeError:  # None
                return

    def write_events(channel, tab, segments):
        """Write events to file with a given filename
        """
        # get filename
        path = create_path(channel)
        h5f = h5py.File(str(path), 'a')

        # read existing table from file
        try:
            old = tab.read(h5f["triggers"], format="hdf5")
        except KeyError:
            pass
        else:
            tab = vstack(old, tab)

        # append event table
        tab.write(h5f, path="triggers", append=True, overwrite=True)

        # write segments
        try:
            oldsegs = DataQualityFlag.read(h5f, path="segments", format="hdf5")
        except KeyError:
            pass
        else:
            segments = oldsegs + segments
        segments.write(h5f, path="segments", append=True, overwrite=True)

        # write file to disk
        h5f.close()
        return path

    # -- load channels --------------------------------

    # get primary channel name
    pchannel = cp.get('primary', 'channel')

    # read auxiliary cache
    if args.auxiliary_cache is not None:
        acache = [e for c in args.auxiliary_cache for e in read_cache(str(c))]
    else:
        acache = None

    # load auxiliary channels
    auxetg = cp.get('auxiliary', 'trigger-generator')
    auxfreq = cp.getfloats('auxiliary', 'frequency-range')
    try:
        auxchannels = cp.get('auxiliary', 'channels').strip('\n').split('\n')
    except config.configparser.NoOptionError:
        auxchannels = find_auxiliary_channels(auxetg,
                                              start,
                                              ifo=args.ifo,
                                              cache=acache)

    # load unsafe channels list
    _unsafe = cp.get('safety', 'unsafe-channels')
    if os.path.isfile(_unsafe):  # from file
        unsafe = set()
        with open(_unsafe, 'rb') as f:
            for c in f.read().rstrip('\n').split('\n'):
                if c.startswith('%(IFO)s'):
                    unsafe.add(c.replace('%(IFO)s', ifo))
                elif not c.startswith('%s:' % ifo):
                    unsafe.add('%s:%s' % (ifo, c))
                else:
                    unsafe.add(c)
    else:  # or from line-seprated list
        unsafe = set(_unsafe.strip('\n').split('\n'))
    unsafe.add(pchannel)
    cp.set('safety', 'unsafe-channels', '\n'.join(sorted(unsafe)))
    LOGGER.debug("Read list of %d unsafe channels" % len(unsafe))

    # remove duplicates
    auxchannels = sorted(set(auxchannels))
    LOGGER.debug("Read list of %d auxiliary channels" % len(auxchannels))

    # remove unsafe channels
    nunsafe = 0
    for i in range(len(auxchannels) - 1, -1, -1):
        if auxchannels[i] in unsafe:
            LOGGER.warning("Auxiliary channel %r identified as unsafe and has "
                           "been removed" % auxchannels[i])
            auxchannels.pop(i)
            nunsafe += 1
    LOGGER.debug("%d auxiliary channels identified as unsafe" % nunsafe)
    naux = len(auxchannels)
    LOGGER.info("Identified %d auxiliary channels to process" % naux)

    # -- load primary triggers -------------------------

    LOGGER.info("Reading events for primary channel...")

    # read primary cache
    if args.primary_cache is not None:
        pcache = [e for c in args.primary_cache for e in read_cache(str(c))]
    else:
        pcache = None

    # get primary params
    petg = cp.get('primary', 'trigger-generator')
    psnr = cp.getfloat('primary', 'snr-threshold')
    pfreq = cp.getfloats('primary', 'frequency-range')
    preadkw = cp.getparams('primary', 'read-')
    ptrigfindkw = cp.getparams('primary', 'trigfind-')

    # load primary triggers
    out = read_and_cache_events(pchannel,
                                petg,
                                snr=psnr,
                                frange=pfreq,
                                cache=pcache,
                                trigfind_kw=ptrigfindkw,
                                **preadkw)
    try:
        e, n = out
    except TypeError:
        e = None
        n = 0
    if n:
        LOGGER.info("Cached %d new events for %s" % (n, pchannel))
    elif args.append and e.is_file():
        LOGGER.info("Cached 0 new events for %s" % pchannel)
    else:
        message = "No events found for %r in %d seconds of livetime" % (
            pchannel, livetime)
        LOGGER.critical(message)

    # write primary to local cache
    pname = trigdir / '{}-HVETO_PRIMARY_CACHE-{}-{}.lcf'.format(
        ifo,
        start,
        duration,
    )
    write_lal_cache(str(pname), [e])
    LOGGER.info('Primary cache written to {}'.format(pname))

    # -- load auxiliary triggers -----------------------

    LOGGER.info("Reading triggers for aux channels...")
    counter = multiprocessing.Value('i', 0)

    areadkw = cp.getparams('auxiliary', 'read-')
    atrigfindkw = cp.getparams('auxiliary', 'trigfind-')

    def read_and_write_aux_triggers(channel):
        if acache is None:
            auxcache = None
        else:
            ifo, name = channel.split(':')
            match = "{}-{}".format(ifo, name.replace('-', '_'))
            auxcache = [e for e in acache if Path(e).name.startswith(match)]

        out = read_and_cache_events(channel,
                                    auxetg,
                                    cache=auxcache,
                                    snr=minsnr,
                                    frange=auxfreq,
                                    trigfind_kw=atrigfindkw,
                                    **areadkw)
        try:
            e, n = out
        except TypeError:
            e = None
            n = 0
        # log result of load
        with counter.get_lock():
            counter.value += 1
            tag = '[%d/%d]' % (counter.value, naux)
            if e is None:  # something went wrong
                LOGGER.critical("    %s Failed to read events for %s" %
                                (tag, channel))
            else:  # either read events or nothing new
                LOGGER.debug("    %s Cached %d new events for %s" %
                             (tag, n, channel))
        return e

    # map with multiprocessing
    if args.nproc > 1:
        pool = multiprocessing.Pool(processes=args.nproc)
        results = pool.map(read_and_write_aux_triggers, auxchannels)
        pool.close()
    # map without multiprocessing
    else:
        results = map(read_and_write_aux_triggers, auxchannels)

    acache = [x for x in results if x is not None]
    aname = trigdir / '{}-HVETO_AUXILIARY_CACHE-{}-{}.lcf'.format(
        ifo,
        start,
        duration,
    )
    write_lal_cache(str(aname), acache)
    LOGGER.info('Auxiliary cache written to {}'.format(aname))

    # -- finish ----------------------------------------

    LOGGER.info('Done, you can use these cache files in an hveto analysis by '
                'passing the following arguments:\n\n--primary-cache {} '
                '--auxiliary-cache {}\n'.format(pname, aname))
Exemple #7
0
def get_segments(flag, validity=None, config=ConfigParser(), cache=None,
                 query=True, return_=True, coalesce=True, padding=None,
                 segdb_error='raise', url=None, **read_kw):
    """Retrieve the segments for a given flag

    Segments will be loaded from global memory if already defined,
    otherwise they will be loaded from the given
    :class:`~glue.lal.Cache`, or finally from the segment database

    Parameters
    ----------
    flag : `str`, `list`
        either the name of one flag, or a list of names

    validity : `~gwpy.segments.SegmentList`
        the segments over which to search for other segments

    query : `bool`, optional, default: `True`
        actually execute a read/query operation (if needed), otherwise
        just retrieve segments that have already been cached

    config : `~configparser.ConfigParser`, optional
        the configuration for your analysis, if you have one. If
        present the ``[segment-database]`` section will be queried
        for the following options

        - ``gps-start-time``, and ``gps-end-time``, if ``validity`` is
          not given
        - ``url`` (the remote hostname for the segment database) if
          the ``url`` keyword is not given

    cache : :class:`glue.lal.Cache`, optional
        a cache of files from which to read segments, otherwise segments
        will be downloaded from the segment database

    coalesce : `bool`, optional, default: `True`
        coalesce all segmentlists before returning, otherwise just return
        segments as they were downloaded/read

    padding : `tuple`, or `dict` of `tuples`, optional
        `(start, end)` padding with which to pad segments that are
        downloaded/read

    segdb_error : `str`, optional, default: ``'raise'``
        how to handle errors returned from the segment database, one of

        - ``'raise'`` (default) : raise the exception as normal
        - ``'warn'`` : print the exception as a warning, but return no
          segments
        - ``'ignore'`` : silently ignore the error and return no segments

    url : `str`, optional
        the remote hostname for the target segment database

    return_ : `bool`, optional, default: `True`
        internal flag to enable (True) or disable (False) actually returning
        anything. This is useful if you want to download/read segments now
        but not use them until later (e.g. plotting)

    **read_kw : `dict`, optional
        additional keyword arguments to `~gwpy.segments.DataQualityDict.read`
        or `~gwpy.segments.DataQualityFlag.read`

    Returns
    -------
    flag : `~gwpy.segments.DataQualityFlag`
        the flag object representing segments for the given single flag, OR

    flagdict : `~gwpy.segments.DataQualityDict`
        the dict of `~gwpy.segments.DataQualityFlag` objects for multiple
        flags, if ``flag`` is given as a `list`, OR

    None
       if ``return_=False``
    """
    if isinstance(flag, str):
        flags = flag.split(',')
    else:
        flags = flag
    allflags = set([f for cf in flags for f in
                    re_flagdiv.split(str(cf))[::2] if f])

    if padding is None and isinstance(flag, DataQualityFlag):
        padding = {flag: flag.padding}
    elif padding is None:
        padding = dict((flag,
                        isinstance(flag, DataQualityFlag) and
                        flag.padding or None) for flag in flags)

    # check validity
    if validity is None:
        start = config.get(DEFAULTSECT, 'gps-start-time')
        end = config.get(DEFAULTSECT, 'gps-end-time')
        span = SegmentList([Segment(start, end)])
    elif isinstance(validity, DataQualityFlag):
        validity = validity.active
        try:
            span = SegmentList([validity.extent()])
        except ValueError:
            span = SegmentList()
    else:
        try:
            span = SegmentList([SegmentList(validity).extent()])
        except ValueError:
            span = SegmentList()
    validity = SegmentList(validity)

    # generate output object
    out = DataQualityDict()
    for f in flags:
        out[f] = DataQualityFlag(f, known=validity, active=validity)
    for f in allflags:
        globalv.SEGMENTS.setdefault(f, DataQualityFlag(f))

    # read segments from global memory and get the union of needed times
    try:
        old = reduce(
            operator.and_,
            (globalv.SEGMENTS.get(f, DataQualityFlag(f)).known for f in flags))
    except TypeError:
        old = SegmentList()
    newsegs = validity - old
    # load new segments
    query &= abs(newsegs) != 0
    query &= len(allflags) > 0
    if cache is not None:
        query &= len(cache) != 0
    if query:
        if cache is not None:
            if isinstance(cache, str) and cache.endswith(
                (".h5", ".hdf", ".hdf5")) and (
                    'path' not in read_kw):
                read_kw['path'] = config.get(
                    'DEFAULT', 'segments-hdf5-path', fallback='segments')
            try:
                new = DataQualityDict.read(cache, list(allflags), **read_kw)
            except IORegistryError as e:
                # can remove when astropy >= 1.2 is required
                if type(e) is not IORegistryError:
                    raise
                if len(allflags) == 1:
                    f = list(allflags)[0]
                    new = DataQualityDict()
                    new[f] = DataQualityFlag.read(
                        cache, f, coalesce=False, **read_kw)
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Read %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        else:
            if len(newsegs) >= 10:
                qsegs = span
            else:
                qsegs = newsegs
            # parse configuration for query
            kwargs = {}
            if url is not None:
                kwargs['url'] = url
            else:
                try:
                    kwargs['url'] = config.get('segment-database', 'url')
                except (NoSectionError, NoOptionError):
                    pass
            if kwargs.get('url', None) in SEGDB_URLS:
                query_func = DataQualityDict.query_segdb
            else:
                query_func = DataQualityDict.query_dqsegdb
            try:
                new = query_func(allflags, qsegs, on_error=segdb_error,
                                 **kwargs)
            except Exception as e:
                # ignore error from SegDB
                if segdb_error in ['ignore', None]:
                    pass
                # convert to warning
                elif segdb_error in ['warn']:
                    print('%sWARNING: %sCaught %s: %s [gwsumm.segments]'
                          % (WARNC, ENDC, type(e).__name__, str(e)),
                          file=sys.stderr)
                    warnings.warn('%s: %s' % (type(e).__name__, str(e)))
                # otherwise raise as normal
                else:
                    raise
                new = DataQualityDict()
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Downloaded %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        # record new segments
        globalv.SEGMENTS += new
        for f in new:
            globalv.SEGMENTS[f].description = str(new[f].description)

    # return what was asked for
    if return_:
        for compound in flags:
            union, intersection, exclude, notequal = split_compound_flag(
                compound)
            if len(union + intersection) == 1:
                out[compound].description = globalv.SEGMENTS[f].description
                out[compound].padding = padding.get(f, (0, 0))
            for flist, op in zip([exclude, intersection, union, notequal],
                                 [operator.sub, operator.and_, operator.or_,
                                  not_equal]):
                for f in flist:
                    pad = padding.get(f, (0, 0))
                    segs = globalv.SEGMENTS[f].copy()
                    if isinstance(pad, (float, int)):
                        segs = segs.pad(pad, pad)
                    elif pad is not None:
                        segs = segs.pad(*pad)
                    if coalesce:
                        segs = segs.coalesce()
                    out[compound] = op(out[compound], segs)
            out[compound].known &= validity
            out[compound].active &= validity
            if coalesce:
                out[compound].coalesce()
        if isinstance(flag, str):
            return out[flag]
        else:
            return out
Exemple #8
0
def get_segments(flag, validity=None, config=ConfigParser(), cache=None,
                 query=True, return_=True, coalesce=True, padding=None,
                 segdb_error='raise', url=None):
    """Retrieve the segments for a given flag

    Segments will be loaded from global memory if already defined,
    otherwise they will be loaded from the given
    :class:`~glue.lal.Cache`, or finally from the segment database

    Parameters
    ----------
    flag : `str`, `list`
        either the name of one flag, or a list of names

    validity : `~gwpy.segments.SegmentList`
        the segments over which to search for other segments

    query : `bool`, optional, default: `True`
        actually execute a read/query operation (if needed), otherwise
        just retrieve segments that have already been cached

    config : `~configparser.ConfigParser`, optional
        the configuration for your analysis, if you have one. If
        present the ``[segment-database]`` section will be queried
        for the following options

        - ``gps-start-time``, and ``gps-end-time``, if ``validity`` is
          not given
        - ``url`` (the remote hostname for the segment database) if
          the ``url`` keyword is not given

    cache : :class:`glue.lal.Cache`, optional
        a cache of files from which to read segments, otherwise segments
        will be downloaded from the segment database

    coalesce : `bool`, optional, default: `True`
        coalesce all segmentlists before returning, otherwise just return
        segments as they were downloaded/read

    padding : `tuple`, or `dict` of `tuples`, optional
        `(start, end)` padding with which to pad segments that are
        downloaded/read

    segdb_error : `str`, optional, default: ``'raise'``
        how to handle errors returned from the segment database, one of

        - ``'raise'`` (default) : raise the exception as normal
        - ``'warn'`` : print the exception as a warning, but return no
          segments
        - ``'ignore'`` : silently ignore the error and return no segments

    url : `str`, optional
        the remote hostname for the target segment database

    return_ : `bool`, optional, default: `True`
        internal flag to enable (True) or disable (False) actually returning
        anything. This is useful if you want to download/read segments now
        but not use them until later (e.g. plotting)

    Returns
    -------
    flag : `~gwpy.segments.DataQualityFlag`
        the flag object representing segments for the given single flag, OR

    flagdict : `~gwpy.segments.DataQualityDict`
        the dict of `~gwpy.segments.DataQualityFlag` objects for multiple
        flags, if ``flag`` is given as a `list`, OR

    None
       if ``return_=False``
    """
    if isinstance(flag, str):
        flags = flag.split(',')
    else:
        flags = flag
    allflags = set([f for cf in flags for f in
                    re_flagdiv.split(str(cf))[::2] if f])

    if padding is None and isinstance(flag, DataQualityFlag):
        padding = {flag: flag.padding}
    elif padding is None:
        padding = dict((flag,
                        isinstance(flag, DataQualityFlag) and
                        flag.padding or None) for flag in flags)

    # check validity
    if validity is None:
        start = config.get(DEFAULTSECT, 'gps-start-time')
        end = config.get(DEFAULTSECT, 'gps-end-time')
        span = SegmentList([Segment(start, end)])
    elif isinstance(validity, DataQualityFlag):
        validity = validity.active
        try:
            span = SegmentList([validity.extent()])
        except ValueError:
            span = SegmentList()
    else:
        try:
            span = SegmentList([SegmentList(validity).extent()])
        except ValueError:
            span = SegmentList()
    validity = SegmentList(validity)

    # generate output object
    out = DataQualityDict()
    for f in flags:
        out[f] = DataQualityFlag(f, known=validity, active=validity)
    for f in allflags:
        globalv.SEGMENTS.setdefault(f, DataQualityFlag(f))

    # read segments from global memory and get the union of needed times
    try:
        old = reduce(
            operator.and_,
            (globalv.SEGMENTS.get(f, DataQualityFlag(f)).known for f in flags))
    except TypeError:
        old = SegmentList()
    newsegs = validity - old
    # load new segments
    query &= abs(newsegs) != 0
    query &= len(allflags) > 0
    if cache is not None:
        query &= len(cache) != 0
    if query:
        if cache is not None:
            try:
                new = DataQualityDict.read(cache, list(allflags))
            except IORegistryError as e:
                # can remove when astropy >= 1.2 is required
                if type(e) is not IORegistryError:
                    raise
                if len(allflags) == 1:
                    f = list(allflags)[0]
                    new = DataQualityDict()
                    new[f] = DataQualityFlag.read(cache, f, coalesce=False)
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Read %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        else:
            if len(newsegs) >= 10:
                qsegs = span
            else:
                qsegs = newsegs
            # parse configuration for query
            kwargs = {}
            if url is not None:
                kwargs['url'] = url
            else:
                try:
                    kwargs['url'] = config.get('segment-database', 'url')
                except (NoSectionError, NoOptionError):
                    pass
            if kwargs.get('url', None) in SEGDB_URLS:
                query_func = DataQualityDict.query_segdb
            else:
                query_func = DataQualityDict.query_dqsegdb
            try:
                new = query_func(allflags, qsegs, on_error=segdb_error,
                                 **kwargs)
            except Exception as e:
                # ignore error from SegDB
                if segdb_error in ['ignore', None]:
                    pass
                # convert to warning
                elif segdb_error in ['warn']:
                    print('%sWARNING: %sCaught %s: %s [gwsumm.segments]'
                          % (WARNC, ENDC, type(e).__name__, str(e)),
                          file=sys.stderr)
                    warnings.warn('%s: %s' % (type(e).__name__, str(e)))
                # otherwise raise as normal
                else:
                    raise
                new = DataQualityDict()
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Downloaded %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        # record new segments
        globalv.SEGMENTS += new
        for f in new:
            globalv.SEGMENTS[f].description = str(new[f].description)

    # return what was asked for
    if return_:
        for compound in flags:
            union, intersection, exclude, notequal = split_compound_flag(
                compound)
            if len(union + intersection) == 1:
                out[compound].description = globalv.SEGMENTS[f].description
                out[compound].padding = padding.get(f, (0, 0))
            for flist, op in zip([exclude, intersection, union, notequal],
                                 [operator.sub, operator.and_, operator.or_,
                                  not_equal]):
                for f in flist:
                    pad = padding.get(f, (0, 0))
                    segs = globalv.SEGMENTS[f].copy()
                    if isinstance(pad, (float, int)):
                        segs = segs.pad(pad, pad)
                    elif pad is not None:
                        segs = segs.pad(*pad)
                    if coalesce:
                        segs = segs.coalesce()
                    out[compound] = op(out[compound], segs)
            out[compound].known &= validity
            out[compound].active &= validity
            if coalesce:
                out[compound].coalesce()
        if isinstance(flag, str):
            return out[flag]
        else:
            return out
Exemple #9
0
def main(args=None):
    """Run the hveto command-line interface
    """
    # declare global variables
    # this is needed for multiprocessing utilities
    global acache, analysis, areadkw, atrigfindkw, auxiliary, auxetg
    global auxfreq, counter, livetime, minsnr, naux, pchannel, primary
    global rnd, snrs, windows

    # parse command-line
    parser = create_parser()
    args = parser.parse_args(args=args)
    ifo = args.ifo
    start = int(args.gpsstart)
    end = int(args.gpsend)
    duration = end - start

    # log startup
    LOGGER.info("-- Welcome to Hveto --")
    LOGGER.info("GPS start time: %d" % start)
    LOGGER.info("GPS end time: %d" % end)
    LOGGER.info("Interferometer: %s" % ifo)

    # -- initialisation -------------------------

    # read configuration
    cp = config.HvetoConfigParser(ifo=ifo)
    cp.read(args.config_file)
    LOGGER.info("Parsed configuration file(s)")

    # format output directory
    outdir = _abs_path(args.output_directory)
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    os.chdir(outdir)
    LOGGER.info("Working directory: %s" % outdir)
    segdir = 'segments'
    plotdir = 'plots'
    trigdir = 'triggers'
    omegadir = 'scans'
    for d in [segdir, plotdir, trigdir, omegadir]:
        if not os.path.isdir(d):
            os.makedirs(d)

    # prepare html variables
    htmlv = {
        'title': '%s Hveto | %d-%d' % (ifo, start, end),
        'config': None,
        'prog': PROG,
        'context': ifo.lower(),
    }

    # get segments
    aflag = cp.get('segments', 'analysis-flag')
    url = cp.get('segments', 'url')
    padding = tuple(cp.getfloats('segments', 'padding'))
    if args.analysis_segments:
        segs_ = DataQualityDict.read(args.analysis_segments, gpstype=float)
        analysis = segs_[aflag]
        span = SegmentList([Segment(start, end)])
        analysis.active &= span
        analysis.known &= span
        analysis.coalesce()
        LOGGER.debug("Segments read from disk")
    else:
        analysis = DataQualityFlag.query(aflag, start, end, url=url)
        LOGGER.debug("Segments recovered from %s" % url)
    if padding != (0, 0):
        mindur = padding[0] - padding[1]
        analysis.active = type(analysis.active)([s for s in analysis.active if
                                                 abs(s) >= mindur])
        analysis.pad(*padding, inplace=True)
        LOGGER.debug("Padding %s applied" % str(padding))
    livetime = int(abs(analysis.active))
    livetimepc = livetime / duration * 100.
    LOGGER.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime"
                % (len(analysis.active), aflag, livetime, livetimepc))

    # apply vetoes from veto-definer file
    try:
        vetofile = cp.get('segments', 'veto-definer-file')
    except configparser.NoOptionError:
        vetofile = None
    else:
        try:
            categories = cp.getfloats('segments', 'veto-definer-categories')
        except configparser.NoOptionError:
            categories = None
        # read file
        vdf = read_veto_definer_file(vetofile, start=start, end=end, ifo=ifo)
        LOGGER.debug("Read veto-definer file from %s" % vetofile)
        # get vetoes from segdb
        vdf.populate(source=url, segments=analysis.active, on_error='warn')
        # coalesce flags from chosen categories
        vetoes = DataQualityFlag('%s:VDF-VETOES:1' % ifo)
        nflags = 0
        for flag in vdf:
            if not categories or vdf[flag].category in categories:
                vetoes += vdf[flag]
                nflags += 1
        try:
            deadtime = int(abs(vetoes.active)) / int(abs(vetoes.known)) * 100
        except ZeroDivisionError:
            deadtime = 0
        LOGGER.debug("Coalesced %ss (%.2f%%) of deadtime from %d veto flags"
                     % (abs(vetoes.active), deadtime, nflags))
        # apply to analysis segments
        analysis -= vetoes
        LOGGER.debug("Applied vetoes from veto-definer file")
        livetime = int(abs(analysis.active))
        livetimepc = livetime / duration * 100.
        LOGGER.info("%ss (%.2f%%) livetime remaining after vetoes"
                    % (livetime, livetimepc))

    snrs = cp.getfloats('hveto', 'snr-thresholds')
    minsnr = min(snrs)
    windows = cp.getfloats('hveto', 'time-windows')

    # record all segments
    segments = DataQualityDict()
    segments[analysis.name] = analysis

    # -- load channels --------------------------

    # get primary channel name
    pchannel = cp.get('primary', 'channel')

    # read auxiliary cache
    if args.auxiliary_cache is not None:
        acache = read_cache(args.auxiliary_cache)
    else:
        acache = None

    # load auxiliary channels
    auxetg = cp.get('auxiliary', 'trigger-generator')
    auxfreq = cp.getfloats('auxiliary', 'frequency-range')
    try:
        auxchannels = cp.get('auxiliary', 'channels').strip('\n').split('\n')
    except config.configparser.NoOptionError:
        auxchannels = find_auxiliary_channels(auxetg, (start, end), ifo=ifo,
                                              cache=acache)
        cp.set('auxiliary', 'channels', '\n'.join(auxchannels))
        LOGGER.debug("Auto-discovered %d "
                     "auxiliary channels" % len(auxchannels))
    else:
        auxchannels = sorted(set(auxchannels))
        LOGGER.debug("Read list of %d auxiliary channels" % len(auxchannels))

    # load unsafe channels list
    _unsafe = cp.get('safety', 'unsafe-channels')
    if os.path.isfile(_unsafe):  # from file
        unsafe = set()
        with open(_unsafe, 'rb') as f:
            for c in f.read().rstrip('\n').split('\n'):
                if c.startswith('%(IFO)s'):
                    unsafe.add(c.replace('%(IFO)s', ifo))
                elif not c.startswith('%s:' % ifo):
                    unsafe.add('%s:%s' % (ifo, c))
                else:
                    unsafe.add(c)
    else:  # or from line-seprated list
        unsafe = set(_unsafe.strip('\n').split('\n'))
    unsafe.add(pchannel)
    cp.set('safety', 'unsafe-channels', '\n'.join(sorted(unsafe)))
    LOGGER.debug("Read list of %d unsafe channels" % len(unsafe))

    # remove unsafe channels
    nunsafe = 0
    for i in range(len(auxchannels) - 1, -1, -1):
        if auxchannels[i] in unsafe:
            LOGGER.warning("Auxiliary channel %r identified as unsafe and has "
                           "been removed" % auxchannels[i])
            auxchannels.pop(i)
            nunsafe += 1
    LOGGER.debug("%d auxiliary channels identified as unsafe" % nunsafe)
    naux = len(auxchannels)
    LOGGER.info("Identified %d auxiliary channels to process" % naux)

    # record INI file in output HTML directory
    inifile = '%s-HVETO_CONFIGURATION-%d-%d.ini' % (ifo, start, duration)
    if os.path.isfile(inifile) and any(
            os.path.samefile(inifile, x) for x in args.config_file):
        LOGGER.debug("Cannot write INI file to %s, file was given as input")
    else:
        with open(inifile, 'w') as f:
            cp.write(f)
        LOGGER.info("Configuration recorded as %s" % inifile)
    htmlv['config'] = inifile

    # -- load primary triggers ------------------

    # read primary cache
    if args.primary_cache is not None:
        pcache = read_cache(args.primary_cache)
    else:
        pcache = None

    # load primary triggers
    petg = cp.get('primary', 'trigger-generator')
    psnr = cp.getfloat('primary', 'snr-threshold')
    pfreq = cp.getfloats('primary', 'frequency-range')
    preadkw = cp.getparams('primary', 'read-')
    if pcache is not None:  # auto-detect the file format
        LOGGER.debug('Unsetting the primary trigger file format')
        preadkw['format'] = None
        preadkw['path'] = 'triggers'
    ptrigfindkw = cp.getparams('primary', 'trigfind-')
    primary = get_triggers(pchannel, petg, analysis.active, snr=psnr,
                           frange=pfreq, cache=pcache, nproc=args.nproc,
                           trigfind_kwargs=ptrigfindkw, **preadkw)
    fcol, scol = primary.dtype.names[1:3]

    if len(primary):
        LOGGER.info("Read %d events for %s" % (len(primary), pchannel))
    else:
        message = "No events found for %r in %d seconds of livetime" % (
           pchannel, livetime)
        LOGGER.critical(message)

    # cluster primary triggers
    clusterkwargs = cp.getparams('primary', 'cluster-')
    if clusterkwargs:
        primary = primary.cluster(**clusterkwargs)
        LOGGER.info("%d primary events remain after clustering over %s" %
                    (len(primary), clusterkwargs['rank']))

    # -- bail out early -------------------------
    # the bail out is done here so that we can at least generate the eventual
    # configuration file, mainly for HTML purposes

    # no segments
    if livetime == 0:
        message = ("No active segments found for analysis flag %r in interval "
                   "[%d, %d)" % (aflag, start, end))
        LOGGER.critical(message)
        htmlv['context'] = 'info'
        index = html.write_null_page(ifo, start, end, message, **htmlv)
        LOGGER.info("HTML report written to %s" % index)
        sys.exit(0)

    # no primary triggers
    if len(primary) == 0:
        htmlv['context'] = 'danger'
        index = html.write_null_page(ifo, start, end, message, **htmlv)
        LOGGER.info("HTML report written to %s" % index)
        sys.exit(0)

    # otherwise write all primary triggers to ASCII
    trigfile = os.path.join(
        trigdir,
        '%s-HVETO_RAW_TRIGS_ROUND_0-%d-%d.txt' % (ifo, start, duration),
    )
    primary.write(trigfile, format='ascii', overwrite=True)

    # -- load auxiliary triggers ----------------

    LOGGER.info("Reading triggers for aux channels...")
    counter = multiprocessing.Value('i', 0)

    areadkw = cp.getparams('auxiliary', 'read-')
    if acache is not None:  # auto-detect the file format
        LOGGER.debug('Unsetting the auxiliary trigger file format')
        areadkw['format'] = None
        areadkw['path'] = 'triggers'
    atrigfindkw = cp.getparams('auxiliary', 'trigfind-')

    # map with multiprocessing
    if args.nproc > 1:
        pool = multiprocessing.Pool(processes=args.nproc)
        results = pool.map(_get_aux_triggers, auxchannels)
        pool.close()
    # map without multiprocessing
    else:
        results = map(_get_aux_triggers, auxchannels)

    LOGGER.info("All aux events loaded")

    auxiliary = dict(x for x in results if x is not None)
    auxchannels = sorted(auxiliary.keys())
    chanfile = '%s-HVETO_CHANNEL_LIST-%d-%d.txt' % (ifo, start, duration)
    with open(chanfile, 'w') as f:
        for chan in auxchannels:
            print(chan, file=f)
    LOGGER.info("Recorded list of valid auxiliary channels in %s" % chanfile)

    # -- execute hveto analysis -----------------

    minsig = cp.getfloat('hveto', 'minimum-significance')

    pevents = [primary]
    pvetoed = []

    auxfcol, auxscol = auxiliary[auxchannels[0]].dtype.names[1:3]
    slabel = plot.get_column_label(scol)
    flabel = plot.get_column_label(fcol)
    auxslabel = plot.get_column_label(auxscol)
    auxflabel = plot.get_column_label(auxfcol)

    rounds = []
    rnd = core.HvetoRound(1, pchannel, rank=scol)
    rnd.segments = analysis.active

    while True:
        LOGGER.info("-- Processing round %d --" % rnd.n)

        # write segments for this round
        segfile = os.path.join(
            segdir, '%s-HVETO_ANALYSIS_SEGS_ROUND_%d-%d-%d.txt'
                    % (ifo, rnd.n, start, duration))
        write_ascii_segments(segfile, rnd.segments)

        # calculate significances for this round
        if args.nproc > 1:  # multiprocessing
            # separate channel list into chunks and process each chunk
            pool = multiprocessing.Pool(
                processes=min(args.nproc, len(auxiliary.keys())))
            chunks = utils.channel_groups(list(auxiliary.keys()), args.nproc)
            results = pool.map(_find_max_significance, chunks)
            pool.close()
            winners, sigsets = zip(*results)
            # find winner of chunk winners
            winner = sorted(winners, key=lambda w: w.significance)[-1]
            # flatten sets of significances into one list
            newsignificances = sigsets[0]
            for subdict in sigsets[1:]:
                newsignificances.update(subdict)
        else:  # single process
            winner, newsignificances = core.find_max_significance(
                primary, auxiliary, pchannel, snrs, windows, rnd.livetime)

        LOGGER.info("Round %d winner: %s" % (rnd.n, winner.name))

        # plot significance drop here for the last round
        #   only now do we actually have the new data to
        #   calculate significance drop
        if rnd.n > 1:
            svg = (pngname % 'SIG_DROP').replace('.png', '.svg')  # noqa: F821
            plot.significance_drop(
                svg, oldsignificances, newsignificances,  # noqa: F821
                title=' | '.join([title, subtitle]),  # noqa: F821
                bbox_inches='tight')
            LOGGER.debug("Figure written to %s" % svg)
            svg = FancyPlot(svg, caption=plot.ROUND_CAPTION['SIG_DROP'])
            rounds[-1].plots.append(svg)
        oldsignificances = newsignificances  # noqa: F841

        # break out of the loop if the significance is below stopping point
        if winner.significance < minsig:
            LOGGER.info("Maximum signifiance below stopping point")
            LOGGER.debug("    (%.2f < %.2f)" % (winner.significance, minsig))
            LOGGER.info("-- Rounds complete! --")
            break

        # work out the vetoes for this round
        allaux = auxiliary[winner.name][
            auxiliary[winner.name][auxscol] >= winner.snr]
        winner.events = allaux
        coincs = allaux[core.find_coincidences(allaux['time'], primary['time'],
                                               dt=winner.window)]
        rnd.vetoes = winner.get_segments(allaux['time'])
        flag = DataQualityFlag(
            '%s:HVT-ROUND_%d:1' % (ifo, rnd.n), active=rnd.vetoes,
            known=rnd.segments,
            description="winner=%s, window=%s, snr=%s" % (
                winner.name, winner.window, winner.snr))
        segments[flag.name] = flag
        LOGGER.debug("Generated veto segments for round %d" % rnd.n)

        # link events before veto for plotting
        before = primary
        beforeaux = auxiliary[winner.name]

        # apply vetoes to primary
        primary, vetoed = core.veto(primary, rnd.vetoes)
        pevents.append(primary)
        pvetoed.append(vetoed)
        LOGGER.debug("Applied vetoes to primary")

        # record results
        rnd.winner = winner
        rnd.efficiency = (len(vetoed), len(primary) + len(vetoed))
        rnd.use_percentage = (len(coincs), len(winner.events))
        if rnd.n > 1:
            rnd.cum_efficiency = (
                len(vetoed) + rounds[-1].cum_efficiency[0],
                rounds[0].efficiency[1])
            rnd.cum_deadtime = (
                rnd.deadtime[0] + rounds[-1].cum_deadtime[0],
                livetime)
        else:
            rnd.cum_efficiency = rnd.efficiency
            rnd.cum_deadtime = rnd.deadtime

        # apply vetoes to auxiliary
        if args.nproc > 1:  # multiprocess
            # separate channel list into chunks and process each chunk
            pool = multiprocessing.Pool(
                processes=min(args.nproc, len(auxiliary.keys())))
            chunks = utils.channel_groups(list(auxiliary.keys()), args.nproc)
            results = pool.map(_veto, chunks)
            pool.close()
            auxiliary = results[0]
            for subdict in results[1:]:
                auxiliary.update(subdict)
        else:  # single process
            auxiliary = core.veto_all(auxiliary, rnd.vetoes)
        LOGGER.debug("Applied vetoes to auxiliary channels")

        # log results
        LOGGER.info("""Results for round %d:\n\n
    winner :          %s
    significance :    %s
    mu :              %s
    snr :             %s
    dt :              %s
    use_percentage :  %s
    efficiency :      %s
    deadtime :        %s
    cum. efficiency : %s
    cum. deadtime :   %s\n\n""" % (
            rnd.n, rnd.winner.name, rnd.winner.significance,
            rnd.winner.mu, rnd.winner.snr, rnd.winner.window,
            rnd.use_percentage, rnd.efficiency, rnd.deadtime,
            rnd.cum_efficiency, rnd.cum_deadtime))

        # write segments
        segfile = os.path.join(
            segdir,
            '%s-HVETO_VETO_SEGS_ROUND_%d-%d-%d.txt' % (
                ifo, rnd.n, start, duration))
        write_ascii_segments(segfile, rnd.vetoes)
        LOGGER.debug("Round %d vetoes written to %s" % (rnd.n, segfile))
        rnd.files['VETO_SEGS'] = (segfile,)
        # write triggers
        trigfile = os.path.join(
            trigdir,
            '%s-HVETO_%%s_TRIGS_ROUND_%d-%d-%d.txt' % (
                ifo, rnd.n, start, duration))
        for tag, arr in zip(
                ['WINNER', 'VETOED', 'RAW'],
                [winner.events, vetoed, primary]):
            f = trigfile % tag
            arr.write(f, format='ascii', overwrite=True)
            LOGGER.debug("Round %d %s events written to %s"
                         % (rnd.n, tag.lower(), f))
            rnd.files[tag] = f

        # record times to omega scan
        if args.omega_scans:
            N = len(vetoed)
            ind = random.sample(range(0, N), min(args.omega_scans, N))
            rnd.scans = vetoed[ind]
            LOGGER.debug("Collected %d events to omega scan:\n\n%s\n\n"
                         % (len(rnd.scans), rnd.scans))

        # -- make some plots --

        pngname = os.path.join(plotdir, '%s-HVETO_%%s_ROUND_%d-%d-%d.png' % (
            ifo, rnd.n, start, duration))
        wname = texify(rnd.winner.name)
        beforel = 'Before\n[%d]' % len(before)
        afterl = 'After\n[%d]' % len(primary)
        vetoedl = 'Vetoed\n(primary)\n[%d]' % len(vetoed)
        beforeauxl = 'All\n[%d]' % len(beforeaux)
        usedl = 'Used\n(aux)\n[%d]' % len(winner.events)
        coincl = 'Coinc.\n[%d]' % len(coincs)
        title = '%s Hveto round %d' % (ifo, rnd.n)
        ptitle = '%s: primary impact' % title
        atitle = '%s: auxiliary use' % title
        subtitle = 'winner: %s [%d-%d]' % (wname, start, end)

        # before/after histogram
        png = pngname % 'HISTOGRAM'
        plot.before_after_histogram(
            png, before[scol], primary[scol],
            label1=beforel, label2=afterl, xlabel=slabel,
            title=ptitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['HISTOGRAM'])
        rnd.plots.append(png)

        # snr versus time
        png = pngname % 'SNR_TIME'
        plot.veto_scatter(
            png, before, vetoed, x='time', y=scol, label1=beforel,
            label2=vetoedl, epoch=start, xlim=[start, end], ylabel=slabel,
            title=ptitle, subtitle=subtitle, legend_title="Primary:")
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['SNR_TIME'])
        rnd.plots.append(png)

        # snr versus frequency
        png = pngname % 'SNR_%s' % fcol.upper()
        plot.veto_scatter(
            png, before, vetoed, x=fcol, y=scol, label1=beforel,
            label2=vetoedl, xlabel=flabel, ylabel=slabel, xlim=pfreq,
            title=ptitle, subtitle=subtitle, legend_title="Primary:")
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['SNR'])
        rnd.plots.append(png)

        # frequency versus time coloured by SNR
        png = pngname % '%s_TIME' % fcol.upper()
        plot.veto_scatter(
            png, before, vetoed, x='time', y=fcol, color=scol,
            label1=None, label2=None, ylabel=flabel,
            clabel=slabel, clim=[3, 100], cmap='YlGnBu',
            epoch=start, xlim=[start, end], ylim=pfreq,
            title=ptitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['TIME'])
        rnd.plots.append(png)

        # aux used versus frequency
        png = pngname % 'USED_SNR_TIME'
        plot.veto_scatter(
            png, winner.events, vetoed, x='time', y=[auxscol, scol],
            label1=usedl, label2=vetoedl, ylabel=slabel, epoch=start,
            xlim=[start, end], title=atitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['USED_SNR_TIME'])
        rnd.plots.append(png)

        # snr versus time
        png = pngname % 'AUX_SNR_TIME'
        plot.veto_scatter(
            png, beforeaux, (winner.events, coincs), x='time', y=auxscol,
            label1=beforeauxl, label2=(usedl, coincl), epoch=start,
            xlim=[start, end], ylabel=auxslabel, title=atitle,
            subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_SNR_TIME'])
        rnd.plots.append(png)

        # snr versus frequency
        png = pngname % 'AUX_SNR_FREQUENCY'
        plot.veto_scatter(
            png, beforeaux, (winner.events, coincs), x=auxfcol, y=auxscol,
            label1=beforeauxl, label2=(usedl, coincl), xlabel=auxflabel,
            ylabel=auxslabel, title=atitle, subtitle=subtitle,
            legend_title="Aux:")
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_SNR_FREQUENCY'])
        rnd.plots.append(png)

        # frequency versus time coloured by SNR
        png = pngname % 'AUX_FREQUENCY_TIME'
        plot.veto_scatter(
            png, beforeaux, (winner.events, coincs), x='time', y=auxfcol,
            color=auxscol, label1=None, label2=[None, None], ylabel=auxflabel,
            clabel=auxslabel, clim=[3, 100], cmap='YlGnBu', epoch=start,
            xlim=[start, end], title=atitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_FREQUENCY_TIME'])
        rnd.plots.append(png)

        # move to the next round
        rounds.append(rnd)
        rnd = core.HvetoRound(rnd.n + 1, pchannel, rank=scol,
                              segments=rnd.segments-rnd.vetoes)

    # write file with all segments
    segfile = os.path.join(
        segdir, '%s-HVETO_SEGMENTS-%d-%d.h5' % (ifo, start, duration))
    segments.write(segfile, overwrite=True)
    LOGGER.debug("Segment summary written to %s" % segfile)

    LOGGER.debug("Making summary figures...")

    # -- exit early if no rounds above threshold

    if not rounds:
        message = ("No rounds completed above threshold. Analysis stopped "
                   "with %s achieving significance of %.2f"
                   % (winner.name, winner.significance))
        LOGGER.critical(message)
        message = message.replace(
            winner.name, cis_link(winner.name, class_='alert-link'))
        message += '<br>[T<sub>win</sub>: %ss, SNR: %s]' % (
            winner.window, winner.snr)
        htmlv['context'] = 'warning'
        index = html.write_null_page(ifo, start, end, message, **htmlv)
        LOGGER.info("HTML report written to %s" % index)
        sys.exit(0)

    # -- plot all rounds impact
    pngname = os.path.join(plotdir, '%s-HVETO_%%s_ALL_ROUNDS-%d-%d.png' % (
        ifo, start, duration))
    plots = []
    title = '%s Hveto all rounds' % args.ifo
    subtitle = '%d rounds | %d-%d' % (len(rounds), start, end)

    # before/after histogram
    png = pngname % 'HISTOGRAM'
    beforel = 'Before analysis [%d events]' % len(pevents[0])
    afterl = 'After %d rounds [%d]' % (len(pevents) - 1, len(pevents[-1]))
    plot.before_after_histogram(
        png, pevents[0][scol], pevents[-1][scol],
        label1=beforel, label2=afterl, xlabel=slabel,
        title=title, subtitle=subtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['HISTOGRAM'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # efficiency/deadtime curve
    png = pngname % 'ROC'
    plot.hveto_roc(png, rounds, title=title, subtitle=subtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['ROC'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # frequency versus time
    png = pngname % '%s_TIME' % fcol.upper()
    labels = [str(r.n) for r in rounds]
    legtitle = 'Vetoed at\nround'
    plot.veto_scatter(
        png, pevents[0], pvetoed,
        label1='', label2=labels, title=title,
        subtitle=subtitle, ylabel=flabel, x='time', y=fcol,
        epoch=start, xlim=[start, end], legend_title=legtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['TIME'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # snr versus time
    png = pngname % 'SNR_TIME'
    plot.veto_scatter(
        png, pevents[0], pvetoed, label1='', label2=labels, title=title,
        subtitle=subtitle, ylabel=slabel, x='time', y=scol,
        epoch=start, xlim=[start, end], legend_title=legtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['SNR_TIME'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # -- write summary states to ASCII table and JSON
    json_ = {
        'user': getuser(),
        'host': getfqdn(),
        'date': str(datetime.datetime.now()),
        'configuration': inifile,
        'ifo': ifo,
        'gpsstart': start,
        'gpsend': end,
        'call': ' '.join(sys.argv),
        'rounds': [],
    }
    with open('summary-stats.txt', 'w') as f:
        # print header
        print('#N winner window SNR significance nveto use-percentage '
              'efficiency deadtime cumulative-efficiency cumulative-deadtime',
              file=f)
        for r in rounds:
            # extract relevant statistics
            results = [
                ('round', r.n),
                ('name', r.winner.name),
                ('window', r.winner.window),
                ('snr', r.winner.snr),
                ('significance', r.winner.significance),
                ('nveto', r.efficiency[0]),
                ('use-percentage',
                    r.use_percentage[0] / r.use_percentage[1] * 100.),
                ('efficiency', r.efficiency[0] / r.efficiency[1] * 100.),
                ('deadtime', r.deadtime[0] / r.deadtime[1] * 100.),
                ('cumulative-efficiency',
                    r.cum_efficiency[0] / r.cum_efficiency[1] * 100.),
                ('cumulative-deadtime',
                    r.cum_deadtime[0] / r.cum_deadtime[1] * 100.),
            ]
            # write to ASCII
            print(' '.join(map(str, list(zip(*results))[1])), file=f)
            # write to JSON
            results.append(('files', r.files))
            json_['rounds'].append(dict(results))
    LOGGER.debug("Summary table written to %s" % f.name)

    with open('summary-stats.json', 'w') as f:
        json.dump(json_, f, sort_keys=True)
    LOGGER.debug("Summary JSON written to %s" % f.name)

    # -- generate workflow for omega scans

    if args.omega_scans:
        omegatimes = list(map(str, sorted(numpy.unique(
            [t['time'] for r in rounds for t in r.scans]))))
        LOGGER.debug("Collected %d times to omega scan" % len(omegatimes))
        newtimes = [t for t in omegatimes if not
                    os.path.exists(os.path.join(omegadir, str(t)))]
        LOGGER.debug("%d scans already complete or in progress, %d remaining"
                     % (len(omegatimes) - len(newtimes), len(newtimes)))
        if len(newtimes) > 0:
            LOGGER.info('Creating workflow for omega scans')
            flags = batch.get_command_line_flags(
                ifo=ifo,
                ignore_state_flags=True)
            condorcmds = batch.get_condor_arguments(
                timeout=4,
                extra_commands=["request_disk='1G'"],
                gps=start)
            batch.generate_dag(
                newtimes,
                flags=flags,
                submit=True,
                outdir=omegadir,
                condor_commands=condorcmds)
            LOGGER.info('Launched {} omega scans to condor'.format(
                len(newtimes)))
        else:
            LOGGER.debug('Skipping omega scans')

    # -- write HTML and finish

    index = html.write_hveto_page(
        ifo, start, end, rounds, plots,
        winners=[r.winner.name for r in rounds], **htmlv)
    LOGGER.debug("HTML written to %s" % index)
    LOGGER.debug("Analysis completed in %d seconds" % (time.time() - JOBSTART))
    LOGGER.info("-- Hveto complete --")
Exemple #10
0
def get_segments(flag, validity=None, config=ConfigParser(), cache=None,
                 query=True, return_=True, coalesce=True, padding=None,
                 segdb_error='raise', url=None):
    """Retrieve the segments for a given flag

    Segments will be loaded from global memory if already defined,
    otherwise they will be loaded from the given
    :class:`~glue.lal.Cache`, or finally from the segment database

    Parameters
    ----------
    FIXME

    Returns
    -------
    FIXME
    """
    if isinstance(flag, (unicode, str)):
        flags = flag.split(',')
    else:
        flags = flag
    allflags = set([f for cf in flags for f in
                    re_flagdiv.split(str(cf))[::2] if f])

    if padding is None and isinstance(flag, DataQualityFlag):
        padding = {flag: flag.padding}
    elif padding is None:
        padding = dict((flag, isinstance(flag, DataQualityFlag) and
                              flag.padding or None) for flag in flags)

    # check validity
    if validity is None:
        start = config.get(DEAFULTSECT, 'gps-start-time')
        end = config.get(DEFAULTSECT, 'gps-end-time')
        span = SegmentList([Segment(start, end)])
    elif isinstance(validity, DataQualityFlag):
        validity = validity.active
        try:
            span = SegmentList([validity.extent()])
        except ValueError:
            span = SegmentList()
    else:
        try:
            span = SegmentList([SegmentList(validity).extent()])
        except ValueError:
            span = SegmentList()
    validity = SegmentList(validity)

    # generate output object
    out = DataQualityDict()
    for f in flags:
        out[f] = DataQualityFlag(f, known=validity, active=validity)
    for f in allflags:
        globalv.SEGMENTS.setdefault(f, DataQualityFlag(f))

    # read segments from global memory and get the union of needed times
    try:
        old = reduce(operator.and_, (globalv.SEGMENTS.get(
                                        f, DataQualityFlag(f)).known
                                    for f in flags))
    except TypeError:
        old = SegmentList()
    newsegs = validity - old
    # load new segments
    query &= abs(newsegs) != 0
    query &= len(allflags) > 0
    if cache is not None:
        query &= len(cache) != 0
    if query:
        if cache is not None:
            try:
                new = DataQualityDict.read(cache, list(allflags))
            except IORegistryError as e:
                # can remove when astropy >= 1.2 is required
                if type(e) is not IORegistryError:
                    raise
                if len(allflags) == 1:
                    f = list(allflags)[0]
                    new = DataQualityDict()
                    new[f] = DataQualityFlag.read(cache, f, coalesce=False)
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Read %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        else:
            if len(newsegs) >= 10:
                qsegs = span
            else:
                qsegs = newsegs
            # parse configuration for query
            kwargs = {}
            if url is not None:
                kwargs['url'] = url
            else:
                try:
                    kwargs['url'] = config.get('segment-database', 'url')
                except (NoSectionError, NoOptionError):
                    pass
            if kwargs.get('url', None) in SEGDB_URLS:
                query_func = DataQualityDict.query_segdb
            else:
                query_func = DataQualityDict.query_dqsegdb
            try:
                new = query_func(allflags, qsegs, on_error=segdb_error,
                                 **kwargs)
            except Exception as e:
                # ignore error from SegDB
                if segdb_error in ['ignore', None]:
                    pass
                # convert to warning
                elif segdb_error in ['warn']:
                    print('%sWARNING: %sCaught %s: %s [gwsumm.segments]'
                          % (WARNC, ENDC, type(e).__name__, str(e)),
                          file=sys.stderr)
                    warnings.warn('%s: %s' % (type(e).__name__, str(e)))
                # otherwise raise as normal
                else:
                    raise
                new = DataQualityDict()
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Downloaded %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        # record new segments
        globalv.SEGMENTS += new
        for f in new:
            globalv.SEGMENTS[f].description = str(new[f].description)

    # return what was asked for
    if return_:
        for compound in flags:
            union, intersection, exclude, notequal = split_compound_flag(
                compound)
            if len(union + intersection) == 1:
                out[compound].description = globalv.SEGMENTS[f].description
                out[compound].padding = padding.get(f, (0, 0))
            for flist, op in zip([exclude, intersection, union, notequal],
                                 [operator.sub, operator.and_, operator.or_,
                                  not_equal]):
                for f in flist:
                    pad = padding.get(f, (0, 0))
                    segs = globalv.SEGMENTS[f].copy()
                    if isinstance(pad, (float, int)):
                        segs = segs.pad(pad, pad)
                    elif pad is not None:
                        segs = segs.pad(*pad)
                    if coalesce:
                        segs = segs.coalesce()
                    out[compound] = op(out[compound], segs)
                    out[compound].known &= segs.known
                    out[compound].active &= segs.known
            out[compound].known &= validity
            out[compound].active &= validity
            if coalesce:
                out[compound].coalesce()
        if isinstance(flag, basestring):
            return out[flag]
        else:
            return out
Exemple #11
0
def get_segments(flag, validity=None, config=ConfigParser(), cache=None,
                 query=True, return_=True, coalesce=True, padding=None,
                 segdb_error='raise', url=None):
    """Retrieve the segments for a given flag

    Segments will be loaded from global memory if already defined,
    otherwise they will be loaded from the given
    :class:`~glue.lal.Cache`, or finally from the segment database

    Parameters
    ----------
    FIXME

    Returns
    -------
    FIXME
    """
    if isinstance(flag, (unicode, str)):
        flags = flag.split(',')
    else:
        flags = flag
    allflags = set([f for cf in flags for f in
                    re_flagdiv.split(str(cf))[::2] if f])

    if padding is None and isinstance(flag, DataQualityFlag):
        padding = {flag: flag.padding}
    elif padding is None:
        padding = dict((flag, isinstance(flag, DataQualityFlag) and
                              flag.padding or None) for flag in flags)

    # check validity
    if validity is None:
        start = config.get(DEAFULTSECT, 'gps-start-time')
        end = config.get(DEFAULTSECT, 'gps-end-time')
        span = SegmentList([Segment(start, end)])
    elif isinstance(validity, DataQualityFlag):
        validity = validity.active
        try:
            span = SegmentList([validity.extent()])
        except ValueError:
            span = SegmentList()
    else:
        try:
            span = SegmentList([SegmentList(validity).extent()])
        except ValueError:
            span = SegmentList()
    validity = SegmentList(validity)

    # generate output object
    out = DataQualityDict()
    for f in flags:
        out[f] = DataQualityFlag(f, known=validity, active=validity)
    for f in allflags:
        globalv.SEGMENTS.setdefault(f, DataQualityFlag(f))

    # read segments from global memory and get the union of needed times
    try:
        old = reduce(operator.and_, (globalv.SEGMENTS.get(
                                        f, DataQualityFlag(f)).known
                                    for f in flags))
    except TypeError:
        old = SegmentList()
    newsegs = validity - old
    # load new segments
    query &= abs(newsegs) != 0
    query &= len(allflags) > 0
    if cache is not None:
        query &= len(cache) != 0
    if query:
        if cache is not None:
            try:
                new = DataQualityDict.read(cache, list(allflags))
            except Exception as e:
                if type(e) is not Exception:
                    raise
                if len(allflags) == 1:
                    f = list(allflags)[0]
                    new = DataQualityDict()
                    new[f] = DataQualityFlag.read(cache, f, coalesce=False)
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Read %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        else:
            if len(newsegs) >= 10:
                qsegs = span
            else:
                qsegs = newsegs
            # parse configuration for query
            kwargs = {}
            if url is not None:
                kwargs['url'] = url
            else:
                try:
                    kwargs['url'] = config.get('segment-database', 'url')
                except (NoSectionError, NoOptionError):
                    pass
            if kwargs.get('url', None) in SEGDB_URLS:
                query_func = DataQualityDict.query_segdb
            else:
                query_func = DataQualityDict.query_dqsegdb
            try:
                new = query_func(allflags, qsegs, on_error=segdb_error,
                                 **kwargs)
            except Exception as e:
                # ignore error from SegDB
                if segdb_error in ['ignore', None]:
                    pass
                # convert to warning
                elif segdb_error in ['warn']:
                    print('%sWARNING: %sCaught %s: %s [gwsumm.segments]'
                          % (WARNC, ENDC, type(e).__name__, str(e)),
                          file=sys.stderr)
                    warnings.warn('%s: %s' % (type(e).__name__, str(e)))
                # otherwise raise as normal
                else:
                    raise
                new = DataQualityDict()
            for f in new:
                new[f].known &= newsegs
                new[f].active &= newsegs
                if coalesce:
                    new[f].coalesce()
                vprint("    Downloaded %d segments for %s (%.2f%% coverage).\n"
                       % (len(new[f].active), f,
                          float(abs(new[f].known))/float(abs(newsegs))*100))
        # record new segments
        globalv.SEGMENTS += new
        for f in new:
            globalv.SEGMENTS[f].description = str(new[f].description)

    # return what was asked for
    if return_:
        for compound in flags:
            union, intersection, exclude, notequal = split_compound_flag(
                compound)
            if len(union + intersection) == 1:
                out[compound].description = globalv.SEGMENTS[f].description
                out[compound].padding = padding.get(f, (0, 0))
            for flist, op in zip([exclude, intersection, union, notequal],
                                 [operator.sub, operator.and_, operator.or_,
                                  not_equal]):
                for f in flist:
                    pad = padding.get(f, (0, 0))
                    segs = globalv.SEGMENTS[f].copy()
                    if isinstance(pad, (float, int)):
                        segs = segs.pad(pad, pad)
                    elif pad is not None:
                        segs = segs.pad(*pad)
                    if coalesce:
                        segs = segs.coalesce()
                    out[compound] = op(out[compound], segs)
                    out[compound].known &= segs.known
                    out[compound].active &= segs.known
            out[compound].known &= validity
            out[compound].active &= validity
            if coalesce:
                out[compound].coalesce()
        if isinstance(flag, basestring):
            return out[flag]
        else:
            return out