def __init__(self, detection_threshold, cal_uncertainty, filter_width, open_box): self.detection_threshold = detection_threshold self.cal_uncertainty = cal_uncertainty self.filter_width = filter_width self.seglists = segments.segmentlistdict() self.vetoseglists = segments.segmentlistdict() self.found = [] self.n_diagnostics = 100 # keep 100 loudest missed and quietest found injections self.loudest_missed = [] self.quietest_found = [] self.all = [] self.open_box = open_box
def segmentlistdict(self): """ A segmentlistdict object describing the instruments and time spanned by this CacheEntry. A new object is constructed each time this attribute is accessed (segments are immutable so there is no reason to try to share a reference to the CacheEntry's internal segment; modifications of one would not be reflected in the other anyway). Example: >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> c.segmentlistdict['H1'] [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))] The \"observatory\" column of the cache entry, which is frequently used to store instrument names, is parsed into instrument names for the dictionary keys using the same rules as glue.ligolw.lsctables.instrumentsproperty.get(). Example: >>> c = CacheEntry("H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml") >>> c.segmentlistdict['H1H2'] [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))] """ # the import has to be done here to break the cyclic # dependancy from glue.ligolw.lsctables import instrumentsproperty instruments = instrumentsproperty.get(self.observatory) or (None, ) return segments.segmentlistdict( (instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
def parse_config_file(options): if options.verbose: print("reading %s ..." % options.config_file, file=sys.stderr) config = ConfigParser.SafeConfigParser() config.read(options.config_file) options.tag = config.get("pipeline", "user_tag") options.enable_clustering = config.getboolean("pipeline", "enable_clustering") seglistdict = segments.segmentlistdict() tiling_phase = {} for ifo in config.get("pipeline", "ifos").split(): seglistdict[ifo] = segmentsUtils.fromsegwizard(file(config.get("pipeline", "seglist_%s" % ifo)), coltype = LIGOTimeGPS).coalesce() try: offset = config.getfloat("pipeline", "tiling_phase_%s" % ifo) except ConfigParser.NoOptionError: offset = 0.0 if offset: tiling_phase[ifo] = offset options.psds_per_power = config.getint("pipeline", "psds_per_power") options.psds_per_injection = config.getint("pipeline", "psds_per_injection") options.timing_params = power.TimingParameters(config) return seglistdict, tiling_phase, config
def make_cache_entry(input_cache, description, path): # summarize segment information seglists = segments.segmentlistdict() for c in input_cache: seglists |= c.segmentlistdict # obtain instrument list instruments = seglists.keys() if None in instruments: instruments.remove(None) instruments.sort() # remove empty segment lists to allow extent_all() to work for instrument in seglists.keys(): if not seglists[instrument]: del seglists[instrument] # make the URL if path: url = "file://localhost%s" % os.path.abspath(path) else: # FIXME: old version of CacheEntry allowed None for URL, # new version doesn't. correct fix is to modify calling # code to not try to initialize the output cache until # after the input is known, but for now we'll just do this # stupid hack. url = "file://localhost/dev/null" # construct a cache entry from the instruments and # segments that remain return CacheEntry("+".join(instruments) or None, description, seglists.extent_all(), url)
def ligolw_bucut(xmldoc, options, burst_test_func, veto_segments=segments.segmentlistdict(), del_non_coincs=False, del_skipped_injections=False, program=None, verbose=False): contents = DocContents(xmldoc, program) process = append_process(xmldoc, options) apply_filters(contents, burst_test_func, veto_segments, del_non_coincs=del_non_coincs, del_skipped_injections=del_skipped_injections, verbose=verbose) ligolw_process.set_process_end_time(process) seg = contents.outsegs.extent_all() ligolw_search_summary.append_search_summary(xmldoc, process, inseg=seg, outseg=seg, nevents=len( contents.snglbursttable)) return xmldoc
def __init__(self, x_instrument, y_instrument): self.fig, self.axes = SnglBurstUtils.make_burst_plot("%s Offset (s)" % x_instrument, "%s Offset (s)" % y_instrument) self.fig.set_size_inches(6,6) self.x_instrument = x_instrument self.y_instrument = y_instrument self.tisi_rows = None self.seglists = segments.segmentlistdict() self.counts = None
def get_veto_segments(self, connection): if self.coinc_inspiral_table: if self.opts.veto_segments_name is not None: return db_thinca_rings.get_veto_segments( connection, opts.self.veto_segments_name) # FIXME BURST CASE VETOS NOT HANDLED else: return segments.segmentlistdict()
def cache_to_seglistdict(cache): """ Construct a coalesced segmentlistdict object from a list of lal.utils.CacheEntry objects. """ s = segments.segmentlistdict() for c in cache: s |= c.segmentlistdict return s
def duration_from_cache(cachen): """ Determine the spanned duration of a cachefile """ segs = segments.segmentlistdict() for entry in map(CacheEntry, open(cachef)): segs |= entry.segmentlistdict segs = segs.union(segs) return segs[0], abs(segs)
def __init__(self, x_instrument, y_instrument): self.fig, self.axes = SnglBurstUtils.make_burst_plot( "%s Offset (s)" % x_instrument, "%s Offset (s)" % y_instrument) self.fig.set_size_inches(6, 6) self.x_instrument = x_instrument self.y_instrument = y_instrument self.tisi_rows = None self.seglists = segments.segmentlistdict() self.counts = None
def get_moon_segments(config_struct, segmentlist, observer, fxdbdy, radec): if "moon_constraint" in config_struct: moon_constraint = float(config_struct["moon_constraint"]) else: moon_constraint = 20.0 moonsegmentlist = segments.segmentlist() dt = 1.0 / 24.0 tt = np.arange(segmentlist[0][0], segmentlist[-1][1] + dt, dt) ra2 = radec.ra.radian d2 = radec.dec.radian # Where is the moon? moon = ephem.Moon() for ii in range(len(tt) - 1): observer.date = ephem.Date(Time(tt[ii], format='mjd', scale='utc').iso) moon.compute(observer) fxdbdy.compute(observer) alt_target = float(repr(fxdbdy.alt)) * (360 / (2 * np.pi)) az_target = float(repr(fxdbdy.az)) * (360 / (2 * np.pi)) #print("Altitude / Azimuth of target: %.5f / %.5f"%(alt_target,az_target)) alt_moon = float(repr(moon.alt)) * (360 / (2 * np.pi)) az_moon = float(repr(moon.az)) * (360 / (2 * np.pi)) #print("Altitude / Azimuth of moon: %.5f / %.5f"%(alt_moon,az_moon)) ra_moon = (180 / np.pi) * float(repr(moon.ra)) dec_moon = (180 / np.pi) * float(repr(moon.dec)) # Coverting both target and moon ra and dec to radians ra1 = float(repr(moon.ra)) d1 = float(repr(moon.dec)) # Calculate angle between target and moon cosA = np.sin(d1) * np.sin(d2) + np.cos(d1) * np.cos(d2) * np.cos(ra1 - ra2) angle = np.arccos(cosA) * (360 / (2 * np.pi)) #print("Angle between moon and target: %.5f"%(angle)) #if angle >= 50.0*moon.moon_phase**2: if angle >= moon_constraint: segment = segments.segment(tt[ii], tt[ii + 1]) moonsegmentlist = moonsegmentlist + segments.segmentlist([segment]) moonsegmentlist.coalesce() moonsegmentlistdic = segments.segmentlistdict() moonsegmentlistdic["observations"] = segmentlist moonsegmentlistdic["moon"] = moonsegmentlist moonsegmentlist = moonsegmentlistdic.intersection(["observations", "moon"]) moonsegmentlist.coalesce() return moonsegmentlist
def load_segments_from_xml(xml_doc, return_dict=False, select_id=None): """Read a ligo.segments.segmentlist from the file object file containing an xml segment table. Parameters ---------- xml_doc: name of segment xml file Keyword Arguments: return_dict : [ True | False ] return a ligo.segments.segmentlistdict containing coalesced ligo.segments.segmentlists keyed by seg_def.name for each entry in the contained segment_def_table. Default False select_id : int return a ligo.segments.segmentlist object containing only those segments matching the given segment_def_id integer """ # Load SegmentDefTable and SegmentTable seg_def_table = load_xml_table(xml_doc, glsctables.SegmentDefTable.tableName) seg_table = load_xml_table(xml_doc, glsctables.SegmentTable.tableName) if return_dict: segs = segments.segmentlistdict() else: segs = segments.segmentlist() seg_id = {} for seg_def in seg_def_table: seg_id[int(seg_def.segment_def_id)] = str(seg_def.name) if return_dict: segs[str(seg_def.name)] = segments.segmentlist() for seg in seg_table: if return_dict: segs[seg_id[int(seg.segment_def_id)]]\ .append(segments.segment(seg.start_time, seg.end_time)) continue if select_id and int(seg.segment_def_id) == select_id: segs.append(segments.segment(seg.start_time, seg.end_time)) continue segs.append(segments.segment(seg.start_time, seg.end_time)) if return_dict: for seg_name in seg_id.values(): segs[seg_name] = segs[seg_name].coalesce() else: segs = segs.coalesce() return segs
def ligolw_bucut(xmldoc, options, burst_test_func, veto_segments = segments.segmentlistdict(), del_non_coincs = False, del_skipped_injections = False, program = None, verbose = False): contents = DocContents(xmldoc, program) process = append_process(xmldoc, options) apply_filters(contents, burst_test_func, veto_segments, del_non_coincs = del_non_coincs, del_skipped_injections = del_skipped_injections, verbose = verbose) ligolw_process.set_process_end_time(process) seg = contents.outsegs.extent_all() ligolw_search_summary.append_search_summary(xmldoc, process, inseg = seg, outseg = seg, nevents = len(contents.snglbursttable)) return xmldoc
def get_segments(params, config_struct): gpstime = params["gpstime"] event_mjd = Time(gpstime, format='gps', scale='utc').mjd segmentlist = segments.segmentlist() n_windows = len(params["Tobs"]) // 2 start_segments = event_mjd + params["Tobs"][::2] end_segments = event_mjd + params["Tobs"][1::2] for start_segment, end_segment in zip(start_segments, end_segments): segmentlist.append(segments.segment(start_segment, end_segment)) observer = ephem.Observer() observer.lat = str(config_struct["latitude"]) observer.lon = str(config_struct["longitude"]) observer.horizon = str(-12.0) observer.elevation = config_struct["elevation"] date_start = ephem.Date( Time(segmentlist[0][0], format='mjd', scale='utc').iso) date_end = ephem.Date( Time(segmentlist[-1][1], format='mjd', scale='utc').iso) observer.date = ephem.Date( Time(segmentlist[0][0], format='mjd', scale='utc').iso) sun = ephem.Sun() nightsegmentlist = segments.segmentlist() while date_start < date_end: date_rise = observer.next_rising(sun, start=date_start) date_set = observer.next_setting(sun, start=date_start) if date_set > date_rise: date_set = observer.previous_setting(sun, start=date_start) astropy_rise = Time(date_rise.datetime(), scale='utc').mjd astropy_set = Time(date_set.datetime(), scale='utc').mjd segment = segments.segment(astropy_set, astropy_rise) nightsegmentlist = nightsegmentlist + segments.segmentlist([segment]) nightsegmentlist.coalesce() date_start = date_rise observer.date = date_rise segmentlistdic = segments.segmentlistdict() segmentlistdic["observations"] = segmentlist segmentlistdic["night"] = nightsegmentlist segmentlist = segmentlistdic.intersection(["observations", "night"]) segmentlist.coalesce() return segmentlist
def get_segments_tile(config_struct, observatory, radec, segmentlist): observer = astroplan.Observer(location=observatory) fxdbdy = astroplan.FixedTarget(coord=radec) date_start = Time(segmentlist[0][0], format='mjd', scale='utc') date_end = Time(segmentlist[-1][1], format='mjd', scale='utc') tilesegmentlist = segments.segmentlist() while date_start.mjd < date_end.mjd: date_rise = observer.target_rise_time(date_start, fxdbdy) date_set = observer.target_set_time(date_start, fxdbdy) print(date_rise.mjd, date_set.mjd) if (date_rise.mjd < 0) and (date_set.mjd < 0): break print(date_rise.mjd, date_set.mjd) if date_rise > date_set: date_rise = observer.target_rise_time( date_start - TimeDelta(24 * u.hour), fxdbdy) print(date_rise.mjd, date_set.mjd) segment = segments.segment(date_rise.mjd, date_set.mjd) tilesegmentlist = tilesegmentlist + segments.segmentlist([segment]) tilesegmentlist.coalesce() date_start = date_set + TimeDelta(24 * u.hour) #moonsegmentlist = get_skybrightness(\ # config_struct,segmentlist,observer,fxdbdy,radec) moonsegmentlist = get_moon_segments(\ config_struct,segmentlist,observer,fxdbdy,radec) tilesegmentlistdic = segments.segmentlistdict() tilesegmentlistdic["observations"] = segmentlist tilesegmentlistdic["tile"] = tilesegmentlist tilesegmentlistdic["moon"] = moonsegmentlist tilesegmentlist = tilesegmentlistdic.intersection( ["observations", "tile", "moon"]) tilesegmentlist.coalesce() return tilesegmentlist
def get_segments(params, config_struct): gpstime = params["gpstime"] event_mjd = Time(gpstime, format='gps', scale='utc').mjd segmentlist = segments.segmentlist() n_windows = len(params["Tobs"]) // 2 start_segments = event_mjd + params["Tobs"][::2] end_segments = event_mjd + params["Tobs"][1::2] for start_segment, end_segment in zip(start_segments, end_segments): segmentlist.append(segments.segment(start_segment, end_segment)) location = astropy.coordinates.EarthLocation(config_struct["longitude"], config_struct["latitude"], config_struct["elevation"]) observer = astroplan.Observer(location=location) date_start = Time(segmentlist[0][0], format='mjd', scale='utc') date_end = Time(segmentlist[-1][1], format='mjd', scale='utc') nightsegmentlist = segments.segmentlist() while date_start < date_end: date_rise = observer.twilight_morning_astronomical(date_start) date_set = observer.twilight_evening_astronomical(date_start) if date_set.mjd > date_rise.mjd: date_set = observer.twilight_evening_astronomical( date_start - TimeDelta(24 * u.hour)) segment = segments.segment(date_set.mjd, date_rise.mjd) nightsegmentlist = nightsegmentlist + segments.segmentlist([segment]) nightsegmentlist.coalesce() date_start = date_rise + TimeDelta(24 * u.hour) segmentlistdic = segments.segmentlistdict() segmentlistdic["observations"] = segmentlist segmentlistdic["night"] = nightsegmentlist segmentlist = segmentlistdic.intersection(["observations", "night"]) segmentlist.coalesce() return segmentlist
def extract_vetoes(veto_files, ifos): """Extracts vetoes from veto filelist""" # Initialize vetoe containers vetoes = segments.segmentlistdict() for ifo in ifos: vetoes[ifo] = segments.segmentlist() # Construct veto list from veto filelist if veto_files: for file in veto_files: ifo = os.path.basename(file)[:2] if ifo in ifos: # This returns a coalesced list of the vetoes tmp_veto_segs = dqSegmentUtils.fromsegmentxml(open(file, 'r')) for entry in tmp_veto_segs: vetoes[ifo].append(entry) for ifo in ifos: vetoes[ifo].coalesce() return vetoes
def segmentlistdict(self): """ A segmentlistdict object describing the instruments and time spanned by this CacheEntry. A new object is constructed each time this attribute is accessed (segments are immutable so there is no reason to try to share a reference to the CacheEntry's internal segment; modifications of one would not be reflected in the other anyway). Example: >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> c.segmentlistdict['H1'] [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))] The \"observatory\" column of the cache entry, which is frequently used to store instrument names, is parsed into instrument names for the dictionary keys using the same rules as ligo.lw.lsctables.instrumentsproperty.get(). Example: >>> c = CacheEntry("H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml") >>> c.segmentlistdict['H1H2'] [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))] """ # the import has to be done here to break the cyclic # dependancy try: from ligo.lw.lsctables import instrumentsproperty except ImportError: # FIXME: remove when we can rely on ligo.lw being installed # (why isn't it!?) from glue.ligolw.lsctables import instrumentsproperty instruments = instrumentsproperty.get(self.observatory) or (None,) return segments.segmentlistdict((instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
def ligolw_bucut(xmldoc, burst_test_func, veto_segments=segments.segmentlistdict(), del_non_coincs=False, del_skipped_injections=False, program=None, comment=None, verbose=False): process = ligolw_process.register_to_xmldoc(xmldoc, process_program_name, paramdict, version=__version__, cvs_repository=u"lscsoft", cvs_entry_time=__date__, comment=comment) contents = DocContents(xmldoc, program) apply_filters(contents, burst_test_func, veto_segments, del_non_coincs=del_non_coincs, del_skipped_injections=del_skipped_injections, verbose=verbose) seg = contents.outsegs.extent_all() ligolw_search_summary.append_search_summary(xmldoc, process, inseg=seg, outseg=seg, nevents=len( contents.snglbursttable)) ligolw_process.set_process_end_time(process) return xmldoc
def extract_vetoes(all_veto_files, ifos, veto_cat): """Extracts vetoes from veto filelist""" if all_veto_files and (veto_cat is None): err_msg = "Must supply veto category to apply vetoes." raise RuntimeError(err_msg) # Initialize veto containers vetoes = segments.segmentlistdict() for ifo in ifos: vetoes[ifo] = segments.segmentlist() veto_files = [] veto_cats = range(2, veto_cat + 1) for cat in veto_cats: veto_files += [vf for vf in all_veto_files if "CAT" + str(cat) in vf] n_found = len(veto_files) n_expected = len(ifos) * len(veto_cats) if n_found != n_expected: err_msg = f"Found {n_found} veto files instead of the expected " err_msg += f"{n_expected}; check the options." raise RuntimeError(err_msg) # Construct veto list from veto filelist if veto_files: for veto_file in veto_files: ifo = os.path.basename(veto_file)[:2] if ifo in ifos: # This returns a coalesced list of the vetoes tmp_veto_segs = load_segments_from_xml(veto_file) for entry in tmp_veto_segs: vetoes[ifo].append(entry) for ifo in ifos: vetoes[ifo].coalesce() return vetoes
self.axes.set_title("Time-Frequency Plane\n(%d Triggers)" % self.nevents) for seg in ~self.seglist & segments.segmentlist([segments.segment(self.axes.get_xlim())]): self.axes.axvspan(float(seg[0]), float(seg[1]), facecolor = "k", alpha = 0.2) # # ============================================================================= # # Load Data # # ============================================================================= # summary = {} seglists = segments.segmentlistdict() for n, filename in enumerate(ligolw_utils.sort_files_by_size(filenames, options.verbose, reverse = True)): if options.verbose: print("%d/%d:" % (n + 1, len(filenames)), end=' ', file=sys.stderr) xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose, contenthandler = ligolw.LIGOLWContentHandler) seglists |= ligolw_search_summary.segmentlistdict_fromsearchsummary(xmldoc, options.livetime_program).coalesce() xmldoc.unlink() # # ============================================================================= # # Plot #
def __init__(self, flist, opts): self.far = {} self.segments = segments.segmentlistdict() self.non_inj_fnames = [] self.inj_fnames = [] #self.der_fit = None self.twoDMassBins = None #self.dBin = {} self.gw = None self.found = {} self.missed = {} self.wnfunc = None self.opts = opts if opts.bootstrap_iterations: self.bootnum = int(opts.bootstrap_iterations) else: self.bootnum = 100 self.veto_segments = segments.segmentlistdict() self.zero_lag_segments = {} self.instruments = [] self.livetime = {} self.minmass = None self.maxmass = None self.mintotal = None self.maxtotal = None for f in flist: if opts.verbose: print("Gathering stats from: %s...." % (f, ), file=sys.stderr) working_filename = dbtables.get_connection_filename( f, verbose=opts.verbose) connection = sqlite3.connect(working_filename) dbtables.DBTable_set_connection(connection) xmldoc = dbtables.get_xml(connection) # look for a sim table try: sim_inspiral_table = dbtables.lsctables.SimInspiralTable.get_table( xmldoc) self.inj_fnames.append(f) sim = True except ValueError: self.non_inj_fnames.append(f) sim = False if not sim: if opts.veto_segments_name is not None: self.veto_segments = db_thinca_rings.get_veto_segments( connection, opts.veto_segments_name) self.get_instruments(connection) self.segments += db_thinca_rings.get_thinca_zero_lag_segments( connection, program_name=opts.live_time_program) self.get_far_thresholds(connection) else: self.get_mass_ranges(connection) #connection.close() dbtables.discard_connection_filename(f, working_filename, verbose=opts.verbose) dbtables.DBTable_set_connection(None) # FIXME Do these have to be done by instruments? self.segments -= self.veto_segments # compute far, segments and livetime by instruments for i in self.instruments: self.far[i] = min(self.far[i]) # FIXME this bombs if any of the FARS are zero. maybe it should continue # and just remove that instrument combo from the calculation if self.far[i] == 0: print("Encountered 0 FAR in %s, ABORTING" % (i, ), file=sys.stderr) sys.exit(1) self.zero_lag_segments[i] = self.segments.intersection( i) - self.segments.union(set(self.segments.keys()) - i) # Livetime must have playground removed self.livetime[i] = float( abs(self.zero_lag_segments[i] - segmentsUtils.S2playground(self.segments.extent_all()))) if opts.verbose: print( "%s FAR %e, livetime %f" % (",".join(sorted(list(i))), self.far[i], self.livetime[i]), file=sys.stderr) # get a 2D mass binning self.twoDMassBins = self.get_2d_mass_bins(self.minmass, self.maxmass, opts.mass_bins)
def segmentlistdict(self): return segments.segmentlistdict( (key, value.segmentlist()) for key, value in self.items())
def __init__(self, opts, flist): self.segments = segments.segmentlistdict() self.non_inj_fnames = [] self.inj_fnames = [] self.found = {} self.missed = {} self.opts = opts self.veto_segments = segments.segmentlistdict() self.zero_lag_segments = {} self.instruments = [] self.livetime = {} self.multi_burst_table = None self.coinc_inspiral_table = None for f in flist: if opts.verbose: print("Gathering stats from: %s...." % (f,), file=sys.stderr) working_filename = dbtables.get_connection_filename(f, tmp_path=opts.tmp_space, verbose = opts.verbose) connection = sqlite3.connect(working_filename) dbtables.DBTable_set_connection(connection) xmldoc = dbtables.get_xml(connection) # look for a sim table try: sim_inspiral_table = dbtables.lsctables.SimInspiralTable.get_table(xmldoc) self.inj_fnames.append(f) sim = True except ValueError: self.non_inj_fnames.append(f) sim = False # FIGURE OUT IF IT IS A BURST OR INSPIRAL RUN try: self.multi_burst_table = dbtables.lsctables.MultiBurstTable.get_table(xmldoc) except ValueError: self.multi_burst_table = None try: self.coinc_inspiral_table = dbtables.lsctables.CoincInspiralTable.get_table(xmldoc) except ValueError: self.coinc_inspiral_table = None if self.multi_burst_table and self.coinc_inspiral_table: print("both burst and inspiral tables found. Aborting", file=sys.stderr) raise ValueError if not sim: self.get_instruments(connection) self.segments += self.get_segments(connection,xmldoc) #FIXME, don't assume veto segments are the same in every file! self.veto_segments = self.get_veto_segments(connection) dbtables.discard_connection_filename(f, working_filename, verbose = opts.verbose) dbtables.DBTable_set_connection(None) # remove redundant instruments self.instruments = list(set(self.instruments)) # FIXME Do these have to be done by instruments? self.segments -= self.veto_segments # segments and livetime by instruments for i in self.instruments: self.zero_lag_segments[i] = self.segments.intersection(i) - self.segments.union(set(self.segments.keys()) - i) self.livetime[i] = float(abs(self.zero_lag_segments[i]))
# ============================================================================= # # # Command line. # options, filenames, paramdict = parse_command_line() # # Clear the statistics book-keeping object. # # FIXME: don't hard-code instruments distributions = stringutils.StringCoincParamsDistributions(["H1", "L1", "V1"]) segs = segments.segmentlistdict() # # Start output document # xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) process = ligolw_process.register_to_xmldoc( xmldoc, program=u"lalapps_string_meas_likelihood", paramdict=paramdict, version=__version__, cvs_repository="lscsoft", cvs_entry_time=__date__, comment=u"")
def get_triggered_coherent_segment(workflow, sciencesegs): """ Construct the coherent network on and off source segments. Can switch to construction of segments for a single IFO search when coherent segments are insufficient for a search. Parameters ----------- workflow : pycbc.workflow.core.Workflow The workflow instance that the calculated segments belong to. sciencesegs : dict Dictionary of all science segments within analysis time. Returns -------- onsource : ligo.segments.segmentlistdict A dictionary containing the on source segments for network IFOs offsource : ligo.segments.segmentlistdict A dictionary containing the off source segments for network IFOs """ # Load parsed workflow config options cp = workflow.cp triggertime = int(os.path.basename(cp.get('workflow', 'trigger-time'))) minduration = int( os.path.basename(cp.get('workflow-exttrig_segments', 'min-duration'))) maxduration = int( os.path.basename(cp.get('workflow-exttrig_segments', 'max-duration'))) onbefore = int( os.path.basename(cp.get('workflow-exttrig_segments', 'on-before'))) onafter = int( os.path.basename(cp.get('workflow-exttrig_segments', 'on-after'))) padding = int( os.path.basename(cp.get('workflow-exttrig_segments', 'pad-data'))) if cp.has_option("workflow-condition_strain", "do-gating"): padding += int(os.path.basename(cp.get("condition_strain", "pad-data"))) quanta = int( os.path.basename(cp.get('workflow-exttrig_segments', 'quanta'))) # Check available data segments meet criteria specified in arguments commonsegs = sciencesegs.extract_common(sciencesegs.keys()) offsrclist = commonsegs[tuple(commonsegs.keys())[0]] if len(offsrclist) > 1: logging.info("Removing network segments that do not contain trigger " "time") for seg in offsrclist: if triggertime in seg: offsrc = seg else: offsrc = offsrclist[0] if abs(offsrc) < minduration + 2 * padding: fail = segments.segment([ triggertime - minduration / 2. - padding, triggertime + minduration / 2. + padding ]) logging.warning("Available network segment shorter than minimum " "allowed duration.") return None, fail # Will segment duration be the maximum desired length or not? if abs(offsrc) >= maxduration + 2 * padding: logging.info("Available network science segment duration (%ds) is " "greater than the maximum allowed segment length (%ds). " "Truncating..." % (abs(offsrc), maxduration)) else: logging.info("Available network science segment duration (%ds) is " "less than the maximum allowed segment length (%ds)." % (abs(offsrc), maxduration)) logging.info("%ds of padding applied at beginning and end of segment." % padding) # Construct on-source onstart = triggertime - onbefore onend = triggertime + onafter oncentre = onstart + ((onbefore + onafter) / 2) onsrc = segments.segment(onstart, onend) logging.info("Constructed ON-SOURCE: duration %ds (%ds before to %ds after" " trigger)." % (abs(onsrc), triggertime - onsrc[0], onsrc[1] - triggertime)) onsrc = segments.segmentlist([onsrc]) # Maximal, centred coherent network segment idealsegment = segments.segment( int(oncentre - padding - 0.5 * maxduration), int(oncentre + padding + 0.5 * maxduration)) # Construct off-source if (idealsegment in offsrc): offsrc = idealsegment elif idealsegment[1] not in offsrc: offsrc &= segments.segment(offsrc[1] - maxduration - 2 * padding, offsrc[1]) elif idealsegment[0] not in offsrc: offsrc &= segments.segment(offsrc[0], offsrc[0] + maxduration + 2 * padding) # Trimming off-source excess = (abs(offsrc) - 2 * padding) % quanta if excess != 0: logging.info("Trimming %ds excess time to make OFF-SOURCE duration a " "multiple of %ds" % (excess, quanta)) offset = (offsrc[0] + abs(offsrc) / 2.) - oncentre if 2 * abs(offset) > excess: if offset < 0: offsrc &= segments.segment(offsrc[0] + excess, offsrc[1]) elif offset > 0: offsrc &= segments.segment(offsrc[0], offsrc[1] - excess) assert abs(offsrc) % quanta == 2 * padding else: logging.info("This will make OFF-SOURCE symmetrical about trigger " "time.") start = int(offsrc[0] - offset + excess / 2) end = int(offsrc[1] - offset - round(float(excess) / 2)) offsrc = segments.segment(start, end) assert abs(offsrc) % quanta == 2 * padding logging.info("Constructed OFF-SOURCE: duration %ds (%ds before to %ds " "after trigger)." % (abs(offsrc) - 2 * padding, triggertime - offsrc[0] - padding, offsrc[1] - triggertime - padding)) offsrc = segments.segmentlist([offsrc]) # Put segments into segmentlistdicts onsource = segments.segmentlistdict() offsource = segments.segmentlistdict() ifos = '' for iifo in sciencesegs.keys(): ifos += str(iifo) onsource[iifo] = onsrc offsource[iifo] = offsrc return onsource, offsource
def pull(self, rankingstat, fapfar = None, zerolag_rankingstatpdf = None, coinc_sieve = None, flush = False, cluster = False, cap_singles = False, FAR_trialsfactor = 1.0): # NOTE: rankingstat is not used to compute the ranking # statistic, it supplies the detector livetime segment # lists to determine which triggers are eligible for # inclusion in the background model and is the destination # for triggers identified for inclusion in the background # model. self.ln_lr_from_triggers is the ranking statistic # function (if set). # extract times when instruments were producing SNR. used # to define "on instruments" for coinc tables, as a safety # check for impossible triggers, and to identify triggers # suitable for use in defining the background PDFs. will # only need segment information for the times for which the # queues will yield triggers, so use a bisection search to # clip the lists to reduce subsequent operation count. age = float(self.time_slide_graph.age) snr_segments = segments.segmentlistdict((instrument, ratebinlist[ratebinlist.value_slice_to_index(slice(age, None))].segmentlist()) for instrument, ratebinlist in rankingstat.denominator.triggerrates.items()) # # iterate over coincidences # gps_time_now = float(lal.UTCToGPS(time.gmtime())) newly_reported = [] flushed = [] flushed_unused = [] self.last_coincs.clear() max_last_coinc_snr = {} for node, events in self.time_slide_graph.pull(newly_reported = newly_reported, flushed = flushed, flushed_unused = flushed_unused, coinc_sieve = coinc_sieve, event_collector = self.backgroundcollector, flush = flush): # construct row objects for coinc tables. coinc, coincmaps, coinc_inspiral = self.coinc_tables.coinc_rows(self.process_id, node.time_slide_id, events, seglists = snr_segments) # some tasks for zero-lag candidates if node.is_zero_lag: # populate ranking statistic's zero-lag # PDFs with triggers from all zero-lag # candidates for event in events: rankingstat.zerolag.increment(event) # latency goes in minimum_duration column. NOTE: # latency is nonsense unless running live. FIXME: # add a proper column for latency coinc_inspiral.minimum_duration = gps_time_now - float(coinc_inspiral.end) # finally, append coinc to tables if cluster: max_last_coinc_snr.setdefault(node, None) if max_last_coinc_snr[node] is None or coinc_inspiral.snr > max_last_coinc_snr[node][3].snr: max_last_coinc_snr[node] = (events, coinc, coincmaps, coinc_inspiral) else: self.coinc_tables.append_coinc(coinc, coincmaps, coinc_inspiral) # add events to the zero-lag ranking # statistic histogram if zerolag_rankingstatpdf is not None and coinc.likelihood is not None: zerolag_rankingstatpdf.zero_lag_lr_lnpdf.count[coinc.likelihood,] += 1 self.last_coincs.add(events, coinc, coincmaps, coinc_inspiral) for node in max_last_coinc_snr: if max_last_coinc_snr[node] is not None: events, coinc, coincmaps, coinc_inspiral = max_last_coinc_snr[node] # assign ranking statistic, FAP and FAR if self.ln_lr_from_triggers is not None: coinc.likelihood = self.ln_lr_from_triggers(events, node.offset_vector) if fapfar is not None: # FIXME: add proper columns to # store these values in coinc_inspiral.combined_far = fapfar.far_from_rank(coinc.likelihood) * FAR_trialsfactor if len(events) == 1 and cap_singles and coinc_inspiral.combined_far < 1. / fapfar.livetime: coinc_inspiral.combined_far = 1. / fapfar.livetime coinc_inspiral.false_alarm_rate = fapfar.fap_from_rank(coinc.likelihood) if zerolag_rankingstatpdf is not None and coinc.likelihood is not None: zerolag_rankingstatpdf.zero_lag_lr_lnpdf.count[coinc.likelihood,] += 1 self.coinc_tables.append_coinc(coinc, coincmaps, coinc_inspiral) self.last_coincs.add(events, coinc, coincmaps, coinc_inspiral) self.sngl_inspiral_table.extend([sngl_trigger for sngl_trigger in events if sngl_trigger.event_id not in self.clustered_sngl_ids]) self.clustered_sngl_ids |= set(e.event_id for e in events) # add selected singles to the noise model if flushed: # times when at least 2 instruments were generating # SNR. used to select zero-lag singles for # inclusion in the denominator. two_or_more_instruments = segmentsUtils.vote(snr_segments.values(), 2) # FIXME: this is needed to work around rounding # problems in safety checks below, trying to # compare GPS trigger times to float segment # boundaries (the boundaries don't have enough # precision to know if triggers near the edge are # in or out). it would be better not to have to # screw around like this. two_or_more_instruments.protract(1e-3) # 1 ms for event in self.backgroundcollector.pull(rankingstat.snr_min, two_or_more_instruments, flushed): rankingstat.denominator.increment(event) # add any triggers that have been used in coincidences for # the first time to the sngl_inspiral table # FIXME: because this information comes from the # coincidence code, which is not aware of the clustering, # we record a lot of singles that aren't really used for # any (retained) coincs. if not cluster: self.sngl_inspiral_table.extend(newly_reported) # save all sngls above the requested sngls SNR threshold. # all sngls that participated in coincs are already in the # document, so only need to check for ones being flushed # and that were never used. if self.sngls_snr_threshold is not None: self.sngl_inspiral_table.extend(event for event in flushed_unused if event.snr >= self.sngls_snr_threshold) # return the triggers that have been flushed return flushed
power.init_job_types(config_parser) # # Using time slide information, construct segment lists describing times # requiring trigger construction. # if options.verbose: print("Computing segments for which lalapps_power jobs are required ...", file=sys.stderr) background_time_slides = {} background_seglistdict = segments.segmentlistdict() if options.do_noninjections: for filename in options.background_time_slides: cache_entry = CacheEntry(None, None, None, "file://localhost" + os.path.abspath(filename)) background_time_slides[cache_entry] = timeslides.load_time_slides(filename, verbose = options.verbose, gz = filename.endswith(".gz")).values() background_seglistdict |= compute_segment_lists(seglistdict, background_time_slides[cache_entry], options.minimum_gap, options.timing_params, full_segments = options.full_segments, verbose = options.verbose) injection_time_slides = {} injection_seglistdict = segments.segmentlistdict() if options.do_injections: for filename in options.injection_time_slides: cache_entry = CacheEntry(None, None, None, "file://localhost" + os.path.abspath(filename)) injection_time_slides[cache_entry] = timeslides.load_time_slides(filename, verbose = options.verbose, gz = filename.endswith(".gz")).values() injection_seglistdict |= compute_segment_lists(seglistdict, injection_time_slides[cache_entry], options.minimum_gap, options.timing_params, full_segments = options.full_segments, verbose = options.verbose)
mass_bins = 11 dist_bins = 50 opts, filenames = parse_command_line() if opts.veto_segments_name is not None: working_filename = dbtables.get_connection_filename(opts.full_data_file, verbose = opts.verbose) connection = sqlite3.connect(working_filename) dbtables.DBTable_set_connection(connection) veto_segments = db_thinca_rings.get_veto_segments(connection, opts.veto_segments_name) connection.close() dbtables.discard_connection_filename(opts.full_data_file, working_filename, verbose = opts.verbose) dbtables.DBTable_set_connection(None) else: veto_segments = segments.segmentlistdict() if not opts.burst_found and not opts.burst_missed: FAR, seglists = get_far_threshold_and_segments(opts.full_data_file, opts.live_time_program, instruments=lsctables.ifos_from_instrument_set(opts.instruments),verbose = opts.verbose) # times when only exactly the required instruments are on seglists -= veto_segments zero_lag_segments = seglists.intersection(opts.instruments) - seglists.union(set(seglists.keys()) - opts.instruments) live_time = float(abs(zero_lag_segments)) print(FAR, live_time) Found, Missed = get_injections(opts.injfnames, FAR, zero_lag_segments, verbose = opts.verbose) else:
def __init__(self): packing.Bin.__init__(self) self.size = segments.segmentlistdict() self.extent = None
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None, tags=None): """ Setup datafind section of the workflow. This section is responsible for generating, or setting up the workflow to generate, a list of files that record the location of the frame files needed to perform the analysis. There could be multiple options here, the datafind jobs could be done at run time or could be put into a dag. The subsequent jobs will know what was done here from the OutFileList containing the datafind jobs (and the Dagman nodes if appropriate. For now the only implemented option is to generate the datafind files at runtime. This module can also check if the frameFiles actually exist, check whether the obtained segments line up with the original ones and update the science segments to reflect missing data files. Parameters ---------- workflow: pycbc.workflow.core.Workflow The workflow class that stores the jobs that will be run. scienceSegs : Dictionary of ifo keyed ligo.segments.segmentlist instances This contains the times that the workflow is expected to analyse. outputDir : path All output files written by datafind processes will be written to this directory. seg_file : SegFile, optional (default=None) The file returned by get_science_segments containing the science segments and the associated segment_summary. This will be used for the segment_summary test and is required if, and only if, performing that test. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniqueify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- datafindOuts : OutGroupList List of all the datafind output files for use later in the pipeline. sci_avlble_file : SegFile SegFile containing the analysable time after checks in the datafind module are applied to the input segment list. For production runs this is expected to be equal to the input segment list. scienceSegs : Dictionary of ifo keyed ligo.segments.segmentlist instances This contains the times that the workflow is expected to analyse. If the updateSegmentTimes kwarg is given this will be updated to reflect any instances of missing data. sci_avlble_name : string The name with which the analysable time is stored in the sci_avlble_file. """ if tags is None: tags = [] logging.info("Entering datafind module") make_analysis_dir(outputDir) cp = workflow.cp # Parse for options in ini file datafind_method = cp.get_opt_tags("workflow-datafind", "datafind-method", tags) if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps", tags): checkSegmentGaps = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-gaps", tags) else: checkSegmentGaps = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist", tags): checkFramesExist = cp.get_opt_tags("workflow-datafind", "datafind-check-frames-exist", tags) else: checkFramesExist = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-segment-summary", tags): checkSegmentSummary = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-summary", tags) else: checkSegmentSummary = "no_test" logging.info("Starting datafind with setup_datafind_runtime_generated") if datafind_method == "AT_RUNTIME_MULTIPLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_SINGLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_SINGLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_FAKE_DATA": pass elif datafind_method == "FROM_PREGENERATED_LCF_FILES": ifos = scienceSegs.keys() datafindcaches, datafindouts = \ setup_datafind_from_pregenerated_lcf_files(cp, ifos, outputDir, tags=tags) else: msg = """Entry datafind-method in [workflow-datafind] does not have " expected value. Valid values are AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES AT_RUNTIME_MULTIPLE_CACHES, AT_RUNTIME_SINGLE_CACHES, FROM_PREGENERATED_LCF_FILES, or AT_RUNTIME_FAKE_DATA. Consult the documentation for more info.""" raise ValueError(msg) using_backup_server = False if datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES" or \ datafind_method == "AT_RUNTIME_SINGLE_FRAMES": if cp.has_option_tags("workflow-datafind", "datafind-backup-datafind-server", tags): using_backup_server = True backup_server = cp.get_opt_tags("workflow-datafind", "datafind-backup-datafind-server", tags) cp_new = copy.deepcopy(cp) cp_new.set("workflow-datafind", "datafind-ligo-datafind-server", backup_server) cp_new.set('datafind', 'urltype', 'gsiftp') backup_datafindcaches, backup_datafindouts =\ setup_datafind_runtime_frames_single_call_perifo(cp_new, scienceSegs, outputDir, tags=tags) backup_datafindouts = datafind_keep_unique_backups(\ backup_datafindouts, datafindouts) datafindcaches.extend(backup_datafindcaches) datafindouts.extend(backup_datafindouts) logging.info("setup_datafind_runtime_generated completed") # If we don't have frame files covering all times we can update the science # segments. if checkSegmentGaps in ['warn','update_times','raise_error']: logging.info("Checking science segments against datafind output....") newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) logging.info("New segments calculated from data find output.....") missingData = False for ifo in scienceSegs.keys(): # If no science segments in input then do nothing if not scienceSegs[ifo]: msg = "No science segments are present for ifo %s, " %(ifo) msg += "the segment metadata indicates there is no analyzable" msg += " strain data between the selected GPS start and end " msg += "times." logging.warning(msg) continue if ifo not in newScienceSegs: msg = "No data frames were found corresponding to the science " msg += "segments for ifo %s" %(ifo) logging.error(msg) missingData = True if checkSegmentGaps == 'update_times': scienceSegs[ifo] = segments.segmentlist() continue missing = scienceSegs[ifo] - newScienceSegs[ifo] if abs(missing): msg = "From ifo %s we are missing frames covering:" %(ifo) msg += "\n%s" % "\n".join(map(str, missing)) missingData = True logging.error(msg) if checkSegmentGaps == 'update_times': # Remove missing time, so that we can carry on if desired logging.info("Updating science segments for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missing if checkSegmentGaps == 'raise_error' and missingData: raise ValueError("Workflow cannot find needed data, exiting.") logging.info("Done checking, any discrepancies are reported above.") elif checkSegmentGaps == 'no_test': pass else: errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Do all of the frame files that were returned actually exist? if checkFramesExist in ['warn','update_times','raise_error']: logging.info("Verifying that all frames exist on disk.") missingFrSegs, missingFrames = \ get_missing_segs_from_frame_file_cache(datafindcaches) missingFlag = False for ifo in missingFrames.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: continue # If using a backup server, does the frame exist remotely? if using_backup_server: # WARNING: This will be slow, but hopefully it will not occur # for too many frames. This could be optimized if # it becomes necessary. new_list = [] for frame in missingFrames[ifo]: for dfout in datafindouts: dfout_pfns = list(dfout.pfns) dfout_urls = [a.url for a in dfout_pfns] if frame.url in dfout_urls: pfn = dfout_pfns[dfout_urls.index(frame.url)] dfout.removePFN(pfn) if len(dfout.pfns) == 0: new_list.append(frame) else: msg = "Frame %s not found locally. "\ %(frame.url,) msg += "Replacing with remote url(s) %s." \ %(str([a.url for a in dfout.pfns]),) logging.info(msg) break else: new_list.append(frame) missingFrames[ifo] = new_list if missingFrames[ifo]: msg = "From ifo %s we are missing the following frames:" %(ifo) msg +='\n'.join([a.url for a in missingFrames[ifo]]) missingFlag = True logging.error(msg) if checkFramesExist == 'update_times': # Remove missing times, so that we can carry on if desired logging.info("Updating science times for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo] if checkFramesExist == 'raise_error' and missingFlag: raise ValueError("Workflow cannot find all frames, exiting.") logging.info("Finished checking frames.") elif checkFramesExist == 'no_test': pass else: errMsg = "checkFramesExist kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Check if there are cases where frames exist, but no entry in the segment # summary table are present. if checkSegmentSummary in ['warn', 'raise_error']: logging.info("Checking the segment summary table against frames.") dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) missingFlag = False # NOTE: Should this be overrideable in the config file? sci_seg_name = "SCIENCE" if seg_file is None: err_msg = "You must provide the science segments SegFile object " err_msg += "if using the datafind-check-segment-summary option." raise ValueError(err_msg) if seg_file.seg_summ_dict is None: err_msg = "The provided science segments SegFile object must " err_msg += "contain a valid segment_summary table if using the " err_msg += "datafind-check-segment-summary option." raise ValueError(err_msg) seg_summary_times = seg_file.seg_summ_dict for ifo in dfScienceSegs.keys(): curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name] missing = (dfScienceSegs[ifo] & seg_file.valid_segments) missing.coalesce() missing = missing - curr_seg_summ_times missing.coalesce() scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo] scienceButNotFrame.coalesce() missing2 = scienceSegs[ifo] - scienceButNotFrame missing2.coalesce() missing2 = missing2 - curr_seg_summ_times missing2.coalesce() if abs(missing): msg = "From ifo %s the following times have frames, " %(ifo) msg += "but are not covered in the segment summary table." msg += "\n%s" % "\n".join(map(str, missing)) logging.error(msg) missingFlag = True if abs(missing2): msg = "From ifo %s the following times have frames, " %(ifo) msg += "are science, and are not covered in the segment " msg += "summary table." msg += "\n%s" % "\n".join(map(str, missing2)) logging.error(msg) missingFlag = True if checkSegmentSummary == 'raise_error' and missingFlag: errMsg = "Segment_summary discrepancy detected, exiting." raise ValueError(errMsg) elif checkSegmentSummary == 'no_test': pass else: errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', " errMsg += "'warn', or 'raise_error'." raise ValueError(errMsg) # Now need to create the file for SCIENCE_AVAILABLE sci_avlble_dict = segments.segmentlistdict() # NOTE: Should this be overrideable in the config file? sci_avlble_name = "SCIENCE_AVAILABLE" for ifo in scienceSegs.keys(): sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo] sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE', sci_avlble_dict, ifo_list = scienceSegs.keys(), valid_segment=workflow.analysis_time, extension='.xml', tags=tags, directory=outputDir) logging.info("Leaving datafind module") if datafind_method == "AT_RUNTIME_FAKE_DATA": datafindouts = None else: datafindouts = FileList(datafindouts) return datafindouts, sci_avlble_file, scienceSegs, sci_avlble_name
# # Command line. # options, filenames, paramdict = parse_command_line() # # Clear the statistics book-keeping object. # # FIXME: don't hard-code instruments distributions = stringutils.StringCoincParamsDistributions(["H1", "L1", "V1"]) segs = segments.segmentlistdict() # # Start output document # xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) process = ligolw_process.register_to_xmldoc(xmldoc, program = u"lalapps_string_meas_likelihood", paramdict = paramdict, version = __version__, cvs_repository = "lscsoft", cvs_entry_time = __date__, comment = u"") # # Iterate over files #
def get_segments_tile(config_struct, observatory, radec, segmentlist): observer = ephem.Observer() observer.lat = str(config_struct["latitude"]) observer.lon = str(config_struct["longitude"]) observer.horizon = str(config_struct["horizon"]) observer.elevation = config_struct["elevation"] fxdbdy = ephem.FixedBody() fxdbdy._ra = ephem.degrees(str(radec.ra.degree)) fxdbdy._dec = ephem.degrees(str(radec.dec.degree)) observer.date = ephem.Date(Time(segmentlist[0][0], format='mjd', scale='utc').iso) fxdbdy.compute(observer) date_start = ephem.Date(Time(segmentlist[0][0], format='mjd', scale='utc').iso) date_end = ephem.Date(Time(segmentlist[-1][1], format='mjd', scale='utc').iso) tilesegmentlist = segments.segmentlist() while date_start < date_end: try: date_rise = observer.next_rising(fxdbdy, start=observer.date) date_set = observer.next_setting(fxdbdy, start=observer.date) if date_rise > date_set: date_rise = observer.previous_rising(fxdbdy, start=observer.date) except ephem.AlwaysUpError: date_rise = date_start date_set = date_end except ephem.NeverUpError: date_rise = ephem.Date(0.0) date_set = ephem.Date(0.0) break astropy_rise = Time(date_rise.datetime(), scale='utc') astropy_set = Time(date_set.datetime(), scale='utc') astropy_rise_mjd = astropy_rise.mjd astropy_set_mjd = astropy_set.mjd # Alt/az reference frame at observatory, now #frame_rise = astropy.coordinates.AltAz(obstime=astropy_rise, location=observatory) #frame_set = astropy.coordinates.AltAz(obstime=astropy_set, location=observatory) # Transform grid to alt/az coordinates at observatory, now #altaz_rise = radec.transform_to(frame_rise) #altaz_set = radec.transform_to(frame_set) segment = segments.segment(astropy_rise_mjd,astropy_set_mjd) tilesegmentlist = tilesegmentlist + segments.segmentlist([segment]) tilesegmentlist.coalesce() date_start = date_set observer.date = date_set #moonsegmentlist = get_skybrightness(\ # config_struct,segmentlist,observer,fxdbdy,radec) halist = get_ha_segments(\ config_struct,segmentlist,observer,fxdbdy,radec) moonsegmentlist = get_moon_segments(\ config_struct,segmentlist,observer,fxdbdy,radec) tilesegmentlistdic = segments.segmentlistdict() tilesegmentlistdic["observations"] = segmentlist tilesegmentlistdic["tile"] = tilesegmentlist tilesegmentlistdic["moon"] = moonsegmentlist tilesegmentlistdic["halist"] = halist tilesegmentlist = tilesegmentlistdic.intersection(["observations","tile","moon","halist"]) #tilesegmentlist = tilesegmentlistdic.intersection(["observations","tile"]) tilesegmentlist.coalesce() return tilesegmentlist
def __init__(self, options): """! Initialize a DataSourceInfo class instance from command line options specified by append_options() """ ## A list of possible, valid data sources ("frames", "framexmit", "lvshm", "white", "silence") self.data_sources = set( ("framexmit", "lvshm", "frames", "white", "silence", "white_live")) self.live_sources = set(("framexmit", "lvshm", "white_live")) assert self.live_sources <= self.data_sources # Sanity check the options if options.data_source not in self.data_sources: raise ValueError("--data-source must be one of %s" % ", ".join(self.data_sources)) if options.data_source == "frames" and options.frame_cache is None: raise ValueError( "--frame-cache must be specified when using --data-source=frames" ) if options.frame_segments_file is not None and options.data_source != "frames": raise ValueError( "can only give --frame-segments-file if --data-source=frames") if options.frame_segments_name is not None and options.frame_segments_file is None: raise ValueError( "can only specify --frame-segments-name if --frame-segments-file is given" ) if not (options.channel_list or options.channel_name): raise ValueError( "must specify a channel list in the form --channel-list=/path/to/file or --channel-name=H1:AUX-CHANNEL-NAME:RATE --channel-name=H1:SOMETHING-ELSE:RATE" ) if (options.channel_list and options.channel_name): raise ValueError( "must specify a channel list in the form --channel-list=/path/to/file or --channel-name=H1:AUX-CHANNEL-NAME:RATE --channel-name=H1:SOMETHING-ELSE:RATE" ) ## Generate a dictionary of requested channels from channel INI file # known/permissible values of safety and fidelity flags self.known_safety = set( ("safe", "unsafe", "unsafeabove2kHz", "unknown")) self.known_fidelity = set(("clean", "flat", "glitchy", "unknown")) # ensure safety and fidelity options are valid options.safety_include = set(options.safety_include) options.fidelity_exclude = set(options.fidelity_exclude) for safety in options.safety_include: assert safety in self.known_safety, '--safety-include=%s is not understood. Must be one of %s' % ( safety, ", ".join(self.known_safety)) for fidelity in options.fidelity_exclude: assert fidelity in self.known_fidelity, '--fidelity-exclude=%s is not understood. Must be one of %s' % ( fidelity, ", ".join(self.known_fidelity)) # dictionary of the requested channels, e.g., {"H1:LDAS-STRAIN": 16384, "H1:ODC-LARM": 2048} if options.channel_list: name, self.extension = options.channel_list.rsplit('.', 1) if self.extension == 'ini': self.channel_dict = channel_dict_from_channel_ini(options) else: self.channel_dict = channel_dict_from_channel_file( options.channel_list) elif options.channel_name: self.extension = 'none' self.channel_dict = channel_dict_from_channel_list( options.channel_name) # set instrument; it is assumed all channels from a given channel list are from the same instrument self.instrument = self.channel_dict[next(iter( self.channel_dict))]['ifo'] # set the maximum number of streams to be run by a single pipeline. self.max_streams = options.max_streams # set the frequency ranges considered by channels with splitting into multiple frequency bands. # If channel sampling rate doesn't fall within this range, it will not be split into multiple bands. self.max_sample_rate = options.max_sample_rate self.min_sample_rate = options.min_sample_rate # split up channels requested into partitions for serial processing if options.equal_subsets: self.channel_subsets = partition_channels_to_equal_subsets( self.channel_dict, self.max_streams, self.min_sample_rate, self.max_sample_rate) else: self.channel_subsets = partition_channels_to_subsets( self.channel_dict, self.max_streams, self.min_sample_rate, self.max_sample_rate) ## A dictionary for shared memory partition, e.g., {"H1": "LHO_Data", "H2": "LHO_Data", "L1": "LLO_Data", "V1": "VIRGO_Data"} self.shm_part_dict = { "H1": "LHO_Data", "H2": "LHO_Data", "L1": "LLO_Data", "V1": "VIRGO_Data" } if options.shared_memory_partition is not None: self.shm_part_dict.update( datasource.channel_dict_from_channel_list( options.shared_memory_partition)) ## options for shared memory self.shm_assumed_duration = options.shared_memory_assumed_duration self.shm_block_size = options.shared_memory_block_size # NOTE: should this be incorporated into options.block_size? currently only used for offline data sources ## A dictionary of framexmit addresses self.framexmit_addr = framexmit_ports["CIT"] if options.framexmit_addr is not None: self.framexmit_addr.update( datasource.framexmit_dict_from_framexmit_list( options.framexmit_addr)) self.framexmit_iface = options.framexmit_iface ## Analysis segment. Default is None self.seg = None ## Set latency output self.latency_output = options.latency_output if options.gps_start_time is not None: if options.gps_end_time is None: raise ValueError( "must provide both --gps-start-time and --gps-end-time") try: start = LIGOTimeGPS(options.gps_start_time) except ValueError: raise ValueError("invalid --gps-start-time '%s'" % options.gps_start_time) try: end = LIGOTimeGPS(options.gps_end_time) except ValueError: raise ValueError("invalid --gps-end-time '%s'" % options.gps_end_time) if start >= end: raise ValueError( "--gps-start-time must be < --gps-end-time: %s < %s" % (options.gps_start_time, options.gps_end_time)) ## Segment from gps start and stop time if given self.seg = segments.segment(LIGOTimeGPS(options.gps_start_time), LIGOTimeGPS(options.gps_end_time)) elif options.gps_end_time is not None: raise ValueError( "must provide both --gps-start-time and --gps-end-time") elif options.data_source not in self.live_sources: raise ValueError( "--gps-start-time and --gps-end-time must be specified when --data-source not one of %s" % ", ".join(sorted(self.live_sources))) if options.frame_segments_file is not None: ## Frame segments from a user defined file self.frame_segments = ligolw_segments.segmenttable_get_by_name( ligolw_utils.load_filename( options.frame_segments_file, contenthandler=ligolw_segments.LIGOLWContentHandler), options.frame_segments_name).coalesce() if self.seg is not None: # Clip frame segments to seek segment if it # exists (not required, just saves some # memory and I/O overhead) self.frame_segments = segments.segmentlistdict( (instrument, seglist & segments.segmentlist([self.seg])) for instrument, seglist in self.frame_segments.items()) else: ## if no frame segments provided, set them to an empty segment list dictionary self.frame_segments = segments.segmentlistdict( {self.instrument: None}) ## frame cache file self.frame_cache = options.frame_cache ## block size in bytes to read data from disk self.block_size = options.block_size ## Data source, one of python.datasource.DataSourceInfo.data_sources self.data_source = options.data_source # FIXME: this is ugly, but we have to protect against busted shared memory partitions if self.data_source == "lvshm": import subprocess subprocess.call([ "smrepair", "--bufmode", "5", self.shm_part_dict[self.instrument] ])
def get_skybrightness(config_struct,segmentlist,observer,fxdbdy,radec): moonsegmentlist = segments.segmentlist() if config_struct["filt"] == "c": passband = "g" else: passband = config_struct["filt"] # Moon phase data (from Coughlin, Stubbs, and Claver Table 2) moon_phases = [2,10,45,90] moon_data = {'u':[2.7,3.1,4.2,5.7], 'g':[2.4,2.8,3.8,5.2], 'r':[2.1,2.5,3.4,4.9], 'i':[1.9,2.3,3.3,4.7], 'z':[1.9,2.2,3.2,4.6], 'y':[1.8,2.2,3.1,4.5]} # Determine moon data for this phase moon_data_passband = moon_data[passband] # Fits to solar sky brightness (from Coughlin, Stubbs, and Claver Table 4) sun_data = {'u':[88.5,-0.5,-0.5,0.4], 'g':[386.5,-2.2,-2.4,0.8], 'r':[189.0,-1.4,-1.1,0.8], 'i':[164.8,-1.5,-0.7,0.6], 'z':[231.2,-2.8,-0.7,1.4], 'zs':[131.1,-1.4,-0.5,0.2], 'y':[92.0,-1.3,-0.2,0.9]} sun_data_error = {'u':[6.2,0.1,0.1,0.1], 'g':[34.0,0.2,0.2,0.5], 'r':[32.7,0.2,0.2,0.5], 'i':[33.1,0.2,0.2,0.5], 'z':[62.3,0.3,0.4,0.9], 'zs':[45.6,0.2,0.3,0.6], 'y':[32.7,0.2,0.2,0.5]} # Determine sun data for this phase sun_data_passband = sun_data[passband] dt = 1.0/24.0 tt = np.arange(segmentlist[0][0],segmentlist[-1][1]+dt,dt) ra2 = radec.ra.radian d2 = radec.dec.radian # Where is the moon? moon = ephem.Moon() for ii in range(len(tt)-1): observer.date = ephem.Date(Time(tt[ii], format='mjd', scale='utc').iso) moon.compute(observer) fxdbdy.compute(observer) alt_target = float(repr(fxdbdy.alt)) * (360/(2*np.pi)) az_target = float(repr(fxdbdy.az)) * (360/(2*np.pi)) #print("Altitude / Azimuth of target: %.5f / %.5f"%(alt_target,az_target)) alt_moon = float(repr(moon.alt)) * (360/(2*np.pi)) az_moon = float(repr(moon.az)) * (360/(2*np.pi)) #print("Altitude / Azimuth of moon: %.5f / %.5f"%(alt_moon,az_moon)) if (alt_target < 30.0) or (alt_moon < 30.0): total_mag, total_mag_error, flux_mag, flux_mag_error = np.inf, np.inf, np.inf, np.inf else: ra_moon = (180/np.pi)*float(repr(moon.ra)) dec_moon = (180/np.pi)*float(repr(moon.dec)) # Coverting both target and moon ra and dec to radians ra1 = float(repr(moon.ra)) d1 = float(repr(moon.dec)) # Calculate angle between target and moon cosA = np.sin(d1)*np.sin(d2) + np.cos(d1)*np.cos(d2)*np.cos(ra1-ra2) angle = np.arccos(cosA)*(360/(2*np.pi)) #print("Angle between moon and target: %.5f"%(angle)) delta_mag = np.interp(moon.moon_phase*100.0,moon_phases,moon_data_passband) delta_mag_error = 0.1*delta_mag flux = sun_data_passband[0] + sun_data_passband[1]*angle +\ sun_data_passband[2]*alt_target + sun_data_passband[3]*alt_moon flux_zp = sun_data_passband[0] + sun_data_passband[1]*90.0 +\ sun_data_passband[2]*90.0 + sun_data_passband[3]*45.0 # check if flux < 0: too small to fit if flux < 0: flux = 1e-10 flux = flux* (10**11) flux_zp = flux_zp* (10**11) flux_mag = -2.5 * (np.log10(flux) - np.log10(flux_zp)) sun_data_passband_error = sun_data_error[passband] flux_error = np.sqrt(sun_data_passband_error[0]**2 + sun_data_passband_error[1]**2 * angle**2 +\ sun_data_passband_error[2]**2 * alt_target**2 + sun_data_passband_error[3]**2 * alt_moon**2) flux_error = flux_error * (10**11) flux_mag_error = 1.08574 * flux_error / flux # Determine total magnitude contribution total_mag = delta_mag + flux_mag total_mag_error = np.sqrt(delta_mag_error**2 + flux_mag_error**2) #print(tt[ii], angle, alt_target, alt_moon, total_mag, total_mag_error) if total_mag > 0.0: segment = segments.segment(tt[ii],tt[ii+1]) moonsegmentlist = moonsegmentlist + segments.segmentlist([segment]) moonsegmentlist.coalesce() #else: # print(tt[ii], angle, alt_target, alt_moon, total_mag, total_mag_error) moonsegmentlistdic = segments.segmentlistdict() moonsegmentlistdic["observations"] = segmentlist moonsegmentlistdic["moon"] = moonsegmentlist moonsegmentlist = moonsegmentlistdic.intersection(["observations","moon"]) moonsegmentlist.coalesce() #print("Keeping %.2f %% of data"%(100.0*np.sum(np.diff(moonsegmentlist))/np.sum(np.diff(segmentlist)))) return moonsegmentlist
def __init__(self, flist, opts): self.far = {} self.segments = segments.segmentlistdict() self.non_inj_fnames = [] self.inj_fnames = [] #self.der_fit = None self.twoDMassBins = None #self.dBin = {} self.gw = None self.found = {} self.missed = {} self.wnfunc = None self.opts = opts if opts.bootstrap_iterations: self.bootnum = int(opts.bootstrap_iterations) else: self.bootnum = 100 self.veto_segments = segments.segmentlistdict() self.zero_lag_segments = {} self.instruments = [] self.livetime = {} self.minmass = None self.maxmass = None self.mintotal = None self.maxtotal = None for f in flist: if opts.verbose: print("Gathering stats from: %s...." % (f,), file=sys.stderr) working_filename = dbtables.get_connection_filename(f, verbose = opts.verbose) connection = sqlite3.connect(working_filename) dbtables.DBTable_set_connection(connection) xmldoc = dbtables.get_xml(connection) # look for a sim table try: sim_inspiral_table = dbtables.lsctables.SimInspiralTable.get_table(xmldoc) self.inj_fnames.append(f) sim = True except ValueError: self.non_inj_fnames.append(f) sim = False if not sim: if opts.veto_segments_name is not None: self.veto_segments = db_thinca_rings.get_veto_segments(connection, opts.veto_segments_name) self.get_instruments(connection) self.segments += db_thinca_rings.get_thinca_zero_lag_segments(connection, program_name = opts.live_time_program) self.get_far_thresholds(connection) else: self.get_mass_ranges(connection) #connection.close() dbtables.discard_connection_filename(f, working_filename, verbose = opts.verbose) dbtables.DBTable_set_connection(None) # FIXME Do these have to be done by instruments? self.segments -= self.veto_segments # compute far, segments and livetime by instruments for i in self.instruments: self.far[i] = min(self.far[i]) # FIXME this bombs if any of the FARS are zero. maybe it should continue # and just remove that instrument combo from the calculation if self.far[i] == 0: print("Encountered 0 FAR in %s, ABORTING" % (i,), file=sys.stderr) sys.exit(1) self.zero_lag_segments[i] = self.segments.intersection(i) - self.segments.union(set(self.segments.keys()) - i) # Livetime must have playground removed self.livetime[i] = float(abs(self.zero_lag_segments[i] - segmentsUtils.S2playground(self.segments.extent_all()))) if opts.verbose: print("%s FAR %e, livetime %f" % (",".join(sorted(list(i))), self.far[i], self.livetime[i]), file=sys.stderr) # get a 2D mass binning self.twoDMassBins = self.get_2d_mass_bins(self.minmass, self.maxmass, opts.mass_bins)
def finish(self): fig, axes = SnglBurstUtils.make_burst_plot(r"Injection Amplitude (\(\mathrm{s}^{-\frac{1}{3}}\))", "Detection Efficiency", width = 108.0) axes.set_title(r"Detection Efficiency vs.\ Amplitude") axes.semilogx() axes.set_position([0.10, 0.150, 0.86, 0.77]) # set desired yticks axes.set_yticks((0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) axes.set_yticklabels((r"\(0\)", r"\(0.1\)", r"\(0.2\)", r"\(0.3\)", r"\(0.4\)", r"\(0.5\)", r"\(0.6\)", r"\(0.7\)", r"\(0.8\)", r"\(0.9\)", r"\(1.0\)")) axes.xaxis.grid(True, which = "major,minor") axes.yaxis.grid(True, which = "major,minor") # put made and found injections in the denominators and # numerators of the efficiency bins bins = rate.NDBins((rate.LogarithmicBins(min(sim.amplitude for sim in self.all), max(sim.amplitude for sim in self.all), 400),)) efficiency_num = rate.BinnedArray(bins) efficiency_den = rate.BinnedArray(bins) for sim in self.found: efficiency_num[sim.amplitude,] += 1 for sim in self.all: efficiency_den[sim.amplitude,] += 1 # generate and plot trend curves. adjust window function # normalization so that denominator array correctly # represents the number of injections contributing to each # bin: make w(0) = 1.0. note that this factor has no # effect on the efficiency because it is common to the # numerator and denominator arrays. we do this for the # purpose of computing the Poisson error bars, which # requires us to know the counts for the bins windowfunc = rate.gaussian_window(self.filter_width) windowfunc /= windowfunc[len(windowfunc) / 2 + 1] rate.filter_array(efficiency_num.array, windowfunc) rate.filter_array(efficiency_den.array, windowfunc) # regularize: adjust unused bins so that the efficiency is # 0, not NaN assert (efficiency_num.array <= efficiency_den.array).all() efficiency_den.array[(efficiency_num.array == 0) & (efficiency_den.array == 0)] = 1 line1, A50, A50_err = render_data_from_bins(file("string_efficiency.dat", "w"), axes, efficiency_num, efficiency_den, self.cal_uncertainty, self.filter_width, colour = "k", linestyle = "-", erroralpha = 0.2) print("Pipeline's 50%% efficiency point for all detections = %g +/- %g%%\n" % (A50, A50_err * 100), file=sys.stderr) # add a legend to the axes axes.legend((line1,), (r"\noindent Injections recovered with $\log \Lambda > %.2f$" % self.detection_threshold,), loc = "lower right") # adjust limits axes.set_xlim([3e-22, 3e-19]) axes.set_ylim([0.0, 1.0]) # # dump some information about the highest-amplitude missed # and quietest-amplitude found injections # self.loudest_missed.sort(reverse = True) self.quietest_found.sort(reverse = True) f = file("string_loud_missed_injections.txt", "w") print("Highest Amplitude Missed Injections", file=f) print("===================================", file=f) for amplitude, sim, offsetvector, filename, ln_likelihood_ratio in self.loudest_missed: print(file=f) print("%s in %s:" % (str(sim.simulation_id), filename), file=f) if ln_likelihood_ratio is None: print("Not recovered", file=f) else: print("Recovered with \\log \\Lambda = %.16g, detection threshold was %.16g" % (ln_likelihood_ratio, self.detection_threshold), file=f) for instrument in self.seglists: print("In %s:" % instrument, file=f) print("\tInjected amplitude:\t%.16g" % SimBurstUtils.string_amplitude_in_instrument(sim, instrument, offsetvector), file=f) print("\tTime of injection:\t%s s" % sim.time_at_instrument(instrument, offsetvector), file=f) print("Amplitude in waveframe:\t%.16g" % sim.amplitude, file=f) t = sim.get_time_geocent() print("Time at geocentre:\t%s s" % t, file=f) print("Segments within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.seglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.seglists)), file=f) print("Vetoes within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.vetoseglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.vetoseglists)), file=f) f = file("string_quiet_found_injections.txt", "w") print("Lowest Amplitude Found Injections", file=f) print("=================================", file=f) for inv_amplitude, sim, offsetvector, filename, ln_likelihood_ratio in self.quietest_found: print(file=f) print("%s in %s:" % (str(sim.simulation_id), filename), file=f) if ln_likelihood_ratio is None: print("Not recovered", file=f) else: print("Recovered with \\log \\Lambda = %.16g, detection threshold was %.16g" % (ln_likelihood_ratio, self.detection_threshold), file=f) for instrument in self.seglists: print("In %s:" % instrument, file=f) print("\tInjected amplitude:\t%.16g" % SimBurstUtils.string_amplitude_in_instrument(sim, instrument, offsetvector), file=f) print("\tTime of injection:\t%s s" % sim.time_at_instrument(instrument, offsetvector), file=f) print("Amplitude in waveframe:\t%.16g" % sim.amplitude, file=f) t = sim.get_time_geocent() print("Time at geocentre:\t%s s" % t, file=f) print("Segments within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.seglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.seglists)), file=f) print("Vetoes within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.vetoseglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.vetoseglists)), file=f) # # done # return fig,
def generate_triggered_segment(workflow, out_dir, sciencesegs): cp = workflow.cp if cp.has_option("workflow", "allow-single-ifo-search"): min_ifos = 1 else: min_ifos = 2 triggertime = int(os.path.basename(cp.get('workflow', 'trigger-time'))) minbefore = int( os.path.basename(cp.get('workflow-exttrig_segments', 'min-before'))) minafter = int( os.path.basename(cp.get('workflow-exttrig_segments', 'min-after'))) minduration = int( os.path.basename(cp.get('workflow-exttrig_segments', 'min-duration'))) onbefore = int( os.path.basename(cp.get('workflow-exttrig_segments', 'on-before'))) onafter = int( os.path.basename(cp.get('workflow-exttrig_segments', 'on-after'))) padding = int( os.path.basename(cp.get('workflow-exttrig_segments', 'pad-data'))) if cp.has_option("workflow-condition_strain", "do-gating"): padding += int(os.path.basename(cp.get("condition_strain", "pad-data"))) # How many IFOs meet minimum data requirements? min_seg = segments.segment(triggertime - onbefore - minbefore - padding, triggertime + onafter + minafter + padding) scisegs = segments.segmentlistdict({ ifo: sciencesegs[ifo] for ifo in sciencesegs.keys() if min_seg in sciencesegs[ifo] and abs(sciencesegs[ifo]) >= minduration }) # Find highest number of IFOs that give an acceptable coherent segment num_ifos = len(scisegs.keys()) while num_ifos >= min_ifos: # Consider all combinations for a given number of IFOs ifo_combos = itertools.combinations(scisegs.keys(), num_ifos) onsource = {} offsource = {} for ifo_combo in ifo_combos: ifos = "".join(ifo_combo) logging.info("Calculating optimal segment for %s.", ifos) segs = segments.segmentlistdict( {ifo: scisegs[ifo] for ifo in ifo_combo}) onsource[ifos], offsource[ifos] = get_triggered_coherent_segment(\ workflow, segs) # Which combination gives the longest coherent segment? valid_combs = [ iifos for iifos in onsource.keys() if onsource[iifos] is not None ] if len(valid_combs) == 0: # If none, offsource dict will contain segments showing criteria # that have not been met, for use in plotting if len(offsource.keys()) > 1: seg_lens = { ifos: abs(next(offsource[ifos].values())[0]) for ifos in offsource.keys() } best_comb = max(seg_lens.iterkeys(), key=(lambda key: seg_lens[key])) else: best_comb = tuple(offsource.keys())[0] logging.info( "No combination of %d IFOs with suitable science " "segment.", num_ifos) else: # Identify best analysis segment if len(valid_combs) > 1: seg_lens = { ifos: abs(next(offsource[ifos].values())[0]) for ifos in valid_combs } best_comb = max(seg_lens.iterkeys(), key=(lambda key: seg_lens[key])) else: best_comb = valid_combs[0] logging.info("Calculated science segments.") offsourceSegfile = os.path.join(out_dir, "offSourceSeg.txt") segmentsUtils.tosegwizard(open(offsourceSegfile, "w"), list(offsource[best_comb].values())[0]) onsourceSegfile = os.path.join(out_dir, "onSourceSeg.txt") segmentsUtils.tosegwizard(file(onsourceSegfile, "w"), list(onsource[best_comb].values())[0]) bufferleft = int( cp.get('workflow-exttrig_segments', 'num-buffer-before')) bufferright = int( cp.get('workflow-exttrig_segments', 'num-buffer-after')) onlen = onbefore + onafter bufferSegment = segments.segment(\ triggertime - onbefore - bufferleft * onlen, triggertime + onafter + bufferright * onlen) bufferSegfile = os.path.join(out_dir, "bufferSeg.txt") segmentsUtils.tosegwizard(file(bufferSegfile, "w"), segments.segmentlist([bufferSegment])) return onsource[best_comb], offsource[best_comb] num_ifos -= 1 logging.warning("No suitable science segments available.") try: return None, offsource[best_comb] except UnboundLocalError: return None, min_seg
def get_coincident_segmentlistdict(seglistdict, offset_vectors): """ Compute the segments for which data is required in order to perform a complete coincidence analysis given the segments for which data is available and the list of offset vectors to be applied to the data during the coincidence analysis. seglistdict is a segmentlistdict object defining the instruments and times for which data is available. offset_vectors is a list of offset vectors to be applied to the data --- dictionaries of instrument/offset pairs. The offset vectors in offset_vectors are applied to the input segments one by one and the interesection of the shifted segments is computed. The segments surviving the intersection are unshifted to their original positions and stored. The return value is the union of the results of this operation. In all cases all pair-wise intersections are computed, that is if an offset vector lists three instruments then this function returns the times when any two of those isntruments are on, including times when all three are on. For example, let us say that "input" is a segmentlistdict object containing segment lists for three instruments, "H1", "H2" and "L1". And let us say that "slides" is a list of dictionaries, and is equal to [{"H1":0, "H2":0, "L1":0}, {"H1":0, "H2":10}]. Then if output = get_coincident_segmentlistdict(input, slides) output will contain, for each of the three instruments, the segments (or parts thereof) from the original lists that are required in order to perform a triple- and double-coincident analyses at zero lag with the three instruments, *and* a double-coincident analysis between H1 and H2 with H2 offset by 10 seconds. The segmentlistdict object returned by this function has its offsets set to those of the input segmentlistdict. """ # don't modify original seglistdict = seglistdict.copy() all_instruments = set(seglistdict) # save original offsets origoffsets = dict(seglistdict.offsets) # compute result coincseglists = segments.segmentlistdict() for offset_vector in offsetvector.component_offsetvectors(offset_vectors, 2): if set(offset_vector).issubset(all_instruments): seglistdict.offsets.update(offset_vector) intersection = seglistdict.extract_common(offset_vector.keys()) intersection.offsets.clear() coincseglists |= intersection # restore original offsets coincseglists.offsets.update(origoffsets) # done return coincseglists
def get_coincident_segmentlistdict(seglistdict, offset_vectors): """ Compute the segments for which data is required in order to perform a complete coincidence analysis given the segments for which data is available and the list of offset vectors to be applied to the data during the coincidence analysis. seglistdict is a segmentlistdict object defining the instruments and times for which data is available. offset_vectors is a list of offset vectors to be applied to the data --- dictionaries of instrument/offset pairs. The offset vectors in offset_vectors are applied to the input segments one by one and the interesection of the shifted segments is computed. The segments surviving the intersection are unshifted to their original positions and stored. The return value is the union of the results of this operation. In all cases all pair-wise intersections are computed, that is if an offset vector lists three instruments then this function returns the times when any two of those isntruments are on, including times when all three are on. For example, let us say that "input" is a segmentlistdict object containing segment lists for three instruments, "H1", "H2" and "L1". And let us say that "slides" is a list of dictionaries, and is equal to [{"H1":0, "H2":0, "L1":0}, {"H1":0, "H2":10}]. Then if output = get_coincident_segmentlistdict(input, slides) output will contain, for each of the three instruments, the segments (or parts thereof) from the original lists that are required in order to perform a triple- and double-coincident analyses at zero lag with the three instruments, *and* a double-coincident analysis between H1 and H2 with H2 offset by 10 seconds. The segmentlistdict object returned by this function has its offsets set to those of the input segmentlistdict. """ # don't modify original seglistdict = seglistdict.copy() all_instruments = set(seglistdict) # save original offsets origoffsets = dict(seglistdict.offsets) # compute result coincseglists = segments.segmentlistdict() for offset_vector in offsetvector.component_offsetvectors( offset_vectors, 2): if set(offset_vector).issubset(all_instruments): seglistdict.offsets.update(offset_vector) intersection = seglistdict.extract_common(offset_vector.keys()) intersection.offsets.clear() coincseglists |= intersection # restore original offsets coincseglists.offsets.update(origoffsets) # done return coincseglists
def get_veto_segments(self, connection): if self.coinc_inspiral_table: if self.opts.veto_segments_name is not None: return db_thinca_rings.get_veto_segments(connection, opts.self.veto_segments_name) # FIXME BURST CASE VETOS NOT HANDLED else: return segments.segmentlistdict()
opts, filenames = parse_command_line() if opts.veto_segments_name is not None: working_filename = dbtables.get_connection_filename(opts.full_data_file, verbose=opts.verbose) connection = sqlite3.connect(working_filename) dbtables.DBTable_set_connection(connection) veto_segments = db_thinca_rings.get_veto_segments(connection, opts.veto_segments_name) connection.close() dbtables.discard_connection_filename(opts.full_data_file, working_filename, verbose=opts.verbose) dbtables.DBTable_set_connection(None) else: veto_segments = segments.segmentlistdict() if not opts.burst_found and not opts.burst_missed: FAR, seglists = get_far_threshold_and_segments( opts.full_data_file, opts.live_time_program, instruments=lsctables.ifos_from_instrument_set(opts.instruments), verbose=opts.verbose) # times when only exactly the required instruments are on seglists -= veto_segments zero_lag_segments = seglists.intersection( opts.instruments) - seglists.union( set(seglists.keys()) - opts.instruments) live_time = float(abs(zero_lag_segments))
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None, tags=None): """ Setup datafind section of the workflow. This section is responsible for generating, or setting up the workflow to generate, a list of files that record the location of the frame files needed to perform the analysis. There could be multiple options here, the datafind jobs could be done at run time or could be put into a dag. The subsequent jobs will know what was done here from the OutFileList containing the datafind jobs (and the Dagman nodes if appropriate. For now the only implemented option is to generate the datafind files at runtime. This module can also check if the frameFiles actually exist, check whether the obtained segments line up with the original ones and update the science segments to reflect missing data files. Parameters ---------- workflow: pycbc.workflow.core.Workflow The workflow class that stores the jobs that will be run. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. outputDir : path All output files written by datafind processes will be written to this directory. seg_file : SegFile, optional (default=None) The file returned by get_science_segments containing the science segments and the associated segment_summary. This will be used for the segment_summary test and is required if, and only if, performing that test. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniqueify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- datafindOuts : OutGroupList List of all the datafind output files for use later in the pipeline. sci_avlble_file : SegFile SegFile containing the analysable time after checks in the datafind module are applied to the input segment list. For production runs this is expected to be equal to the input segment list. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. If the updateSegmentTimes kwarg is given this will be updated to reflect any instances of missing data. sci_avlble_name : string The name with which the analysable time is stored in the sci_avlble_file. """ if tags is None: tags = [] logging.info("Entering datafind module") make_analysis_dir(outputDir) cp = workflow.cp # Parse for options in ini file datafind_method = cp.get_opt_tags("workflow-datafind", "datafind-method", tags) if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps", tags): checkSegmentGaps = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-gaps", tags) else: checkSegmentGaps = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist", tags): checkFramesExist = cp.get_opt_tags("workflow-datafind", "datafind-check-frames-exist", tags) else: checkFramesExist = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-segment-summary", tags): checkSegmentSummary = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-summary", tags) else: checkSegmentSummary = "no_test" logging.info("Starting datafind with setup_datafind_runtime_generated") if datafind_method == "AT_RUNTIME_MULTIPLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_SINGLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_SINGLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafind_method == "AT_RUNTIME_FAKE_DATA": pass elif datafind_method == "FROM_PREGENERATED_LCF_FILES": ifos = scienceSegs.keys() datafindcaches, datafindouts = \ setup_datafind_from_pregenerated_lcf_files(cp, ifos, outputDir, tags=tags) else: msg = """Entry datafind-method in [workflow-datafind] does not have " expected value. Valid values are AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES AT_RUNTIME_MULTIPLE_CACHES, AT_RUNTIME_SINGLE_CACHES, FROM_PREGENERATED_LCF_FILES, or AT_RUNTIME_FAKE_DATA. Consult the documentation for more info.""" raise ValueError(msg) using_backup_server = False if datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES" or \ datafind_method == "AT_RUNTIME_SINGLE_FRAMES": if cp.has_option_tags("workflow-datafind", "datafind-backup-datafind-server", tags): using_backup_server = True backup_server = cp.get_opt_tags("workflow-datafind", "datafind-backup-datafind-server", tags) cp_new = copy.deepcopy(cp) cp_new.set("workflow-datafind", "datafind-ligo-datafind-server", backup_server) cp_new.set('datafind', 'urltype', 'gsiftp') backup_datafindcaches, backup_datafindouts =\ setup_datafind_runtime_frames_single_call_perifo(cp_new, scienceSegs, outputDir, tags=tags) backup_datafindouts = datafind_keep_unique_backups(\ backup_datafindouts, datafindouts) datafindcaches.extend(backup_datafindcaches) datafindouts.extend(backup_datafindouts) logging.info("setup_datafind_runtime_generated completed") # If we don't have frame files covering all times we can update the science # segments. if checkSegmentGaps in ['warn','update_times','raise_error']: logging.info("Checking science segments against datafind output....") newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) logging.info("New segments calculated from data find output.....") missingData = False for ifo in scienceSegs.keys(): # If no science segments in input then do nothing if not scienceSegs[ifo]: msg = "No science segments are present for ifo %s, " %(ifo) msg += "the segment metadata indicates there is no analyzable" msg += " strain data between the selected GPS start and end " msg += "times." logging.warning(msg) continue if ifo not in newScienceSegs: msg = "No data frames were found corresponding to the science " msg += "segments for ifo %s" %(ifo) logging.error(msg) missingData = True if checkSegmentGaps == 'update_times': scienceSegs[ifo] = segments.segmentlist() continue missing = scienceSegs[ifo] - newScienceSegs[ifo] if abs(missing): msg = "From ifo %s we are missing frames covering:" %(ifo) msg += "\n%s" % "\n".join(map(str, missing)) missingData = True logging.error(msg) if checkSegmentGaps == 'update_times': # Remove missing time, so that we can carry on if desired logging.info("Updating science segments for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missing if checkSegmentGaps == 'raise_error' and missingData: raise ValueError("Workflow cannot find needed data, exiting.") logging.info("Done checking, any discrepancies are reported above.") elif checkSegmentGaps == 'no_test': pass else: errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Do all of the frame files that were returned actually exist? if checkFramesExist in ['warn','update_times','raise_error']: logging.info("Verifying that all frames exist on disk.") missingFrSegs, missingFrames = \ get_missing_segs_from_frame_file_cache(datafindcaches) missingFlag = False for ifo in missingFrames.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: continue # If using a backup server, does the frame exist remotely? if using_backup_server: # WARNING: This will be slow, but hopefully it will not occur # for too many frames. This could be optimized if # it becomes necessary. new_list = [] for frame in missingFrames[ifo]: for dfout in datafindouts: dfout_pfns = list(dfout.pfns) dfout_urls = [a.url for a in dfout_pfns] if frame.url in dfout_urls: pfn = dfout_pfns[dfout_urls.index(frame.url)] dfout.removePFN(pfn) if len(dfout.pfns) == 0: new_list.append(frame) else: msg = "Frame %s not found locally. "\ %(frame.url,) msg += "Replacing with remote url(s) %s." \ %(str([a.url for a in dfout.pfns]),) logging.info(msg) break else: new_list.append(frame) missingFrames[ifo] = new_list if missingFrames[ifo]: msg = "From ifo %s we are missing the following frames:" %(ifo) msg +='\n'.join([a.url for a in missingFrames[ifo]]) missingFlag = True logging.error(msg) if checkFramesExist == 'update_times': # Remove missing times, so that we can carry on if desired logging.info("Updating science times for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo] if checkFramesExist == 'raise_error' and missingFlag: raise ValueError("Workflow cannot find all frames, exiting.") logging.info("Finished checking frames.") elif checkFramesExist == 'no_test': pass else: errMsg = "checkFramesExist kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Check if there are cases where frames exist, but no entry in the segment # summary table are present. if checkSegmentSummary in ['warn', 'raise_error']: logging.info("Checking the segment summary table against frames.") dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) missingFlag = False # NOTE: Should this be overrideable in the config file? sci_seg_name = "SCIENCE" if seg_file is None: err_msg = "You must provide the science segments SegFile object " err_msg += "if using the datafind-check-segment-summary option." raise ValueError(err_msg) if seg_file.seg_summ_dict is None: err_msg = "The provided science segments SegFile object must " err_msg += "contain a valid segment_summary table if using the " err_msg += "datafind-check-segment-summary option." raise ValueError(err_msg) seg_summary_times = seg_file.seg_summ_dict for ifo in dfScienceSegs.keys(): curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name] missing = (dfScienceSegs[ifo] & seg_file.valid_segments) missing.coalesce() missing = missing - curr_seg_summ_times missing.coalesce() scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo] scienceButNotFrame.coalesce() missing2 = scienceSegs[ifo] - scienceButNotFrame missing2.coalesce() missing2 = missing2 - curr_seg_summ_times missing2.coalesce() if abs(missing): msg = "From ifo %s the following times have frames, " %(ifo) msg += "but are not covered in the segment summary table." msg += "\n%s" % "\n".join(map(str, missing)) logging.error(msg) missingFlag = True if abs(missing2): msg = "From ifo %s the following times have frames, " %(ifo) msg += "are science, and are not covered in the segment " msg += "summary table." msg += "\n%s" % "\n".join(map(str, missing2)) logging.error(msg) missingFlag = True if checkSegmentSummary == 'raise_error' and missingFlag: errMsg = "Segment_summary discrepancy detected, exiting." raise ValueError(errMsg) elif checkSegmentSummary == 'no_test': pass else: errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', " errMsg += "'warn', or 'raise_error'." raise ValueError(errMsg) # Now need to create the file for SCIENCE_AVAILABLE sci_avlble_dict = segments.segmentlistdict() # NOTE: Should this be overrideable in the config file? sci_avlble_name = "SCIENCE_AVAILABLE" for ifo in scienceSegs.keys(): sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo] sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE', sci_avlble_dict, ifo_list = scienceSegs.keys(), valid_segment=workflow.analysis_time, extension='.xml', tags=tags, directory=outputDir) logging.info("Leaving datafind module") if datafind_method == "AT_RUNTIME_FAKE_DATA": datafindouts = None else: datafindouts = FileList(datafindouts) return datafindouts, sci_avlble_file, scienceSegs, sci_avlble_name