def make_ligolw_add(orig_cache, pattern, outfile, logdir, cp, ligolw_job=None): """ Take a cache, sieve it for pattern, create a cache-file for ligolw_add, and create a LigolwAddNode that will create a new output cache with the matching files replaced by the single ligolw_added file. """ # determine the files to ligolw_add sub_cache = orig_cache.sieve(description=pattern) if len(sub_cache) == 0: print("warning: no files on which to run ligolw_add", file=sys.stderr) return None # create the cache-file cachefile = os.path.basename( outfile )[:-3]+'cache' sub_cache.tofile(open(cachefile,'w')) if ligolw_job is None: ligolw_job = pipeline.LigolwAddJob(logdir, cp) ligolw_job.set_universe("local") node = pipeline.LigolwAddNode(ligolw_job) node.add_output_file(outfile) node.add_var_opt("input", cachefile) node.add_var_opt("output", outfile) # return the cache-file without the ones # just extracted above new_cache = lal.Cache([entry for entry in orig_cache if entry not in sub_cache]) new_cache.extend(lal.Cache.from_urls([outfile])) return node, new_cache
def GenerateCache(fileList): """ Generate a lal.Cache for the list of files @param fileList : a list of files @return cache """ cache = lal.Cache() for file in fileList: AddFileToCache(file, cache) return (cache)
def find_latest(self, site, frametype, urltype=None, on_missing="warn"): """Query for the most recent framefile of a given type. @param site: single-character name of site to match @param frametype: name of frametype to match @param urltype: file scheme to search for (e.g. 'file') @param on_missing: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type urltype: L{str} @type on_missing: L{str} @returns: L{Cache<glue.lal.Cache>} with one L{entry<glue.lal.CacheEntry>} @raises RuntimeError: if given framefile is malformed @raises RuntimeError: if no frames are found and C{on_missing='error'} """ if on_missing not in ('warn', 'error', 'ignore'): raise ValueError( "on_missing must be 'warn', 'error', or 'ignore'.") url = "%s/gwf/%s/%s/latest" % (_url_prefix, site, frametype) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" response = self._requestresponse("GET", url) urllist = decode(response.read()) if len(urllist) == 0: if on_missing == "warn": sys.stderr.write("No files found!\n") elif on_missing == "error": raise RuntimeError("No files found!") return lal.Cache([ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ])
def find_frame(self, framefile, urltype=None, on_missing="warn"): """Query the LDR host for a single framefile @returns: L{Cache<glue.lal.Cache>} @param frametype: name of frametype to match @param urltype: file scheme to search for (e.g. 'file') @param on_missing: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type frametype: L{str} @type urltype: L{str} @type on_missing: L{str} @raises RuntimeError: if given framefile is malformed """ if on_missing not in ("warn", "error", "ignore"): raise ValueError( "on_missing must be 'warn', 'error', or 'ignore'.") framefile = os.path.basename(framefile) # parse file name for site, frame type try: site, frametype, _, _ = framefile.split("-") except Exception as e: raise RuntimeError("Error parsing filename %s: %s" % (framefile, e)) url = ("%s/gwf/%s/%s/%s.json" % (_url_prefix, site, frametype, framefile)) response = self._requestresponse("GET", url) urllist = decode(response.read()) if len(urllist) == 0: if on_missing == "warn": sys.stderr.write("No files found!\n") elif on_missing == "error": raise RuntimeError("No files found!") # verify urltype is what we want cache = lal.Cache(e for e in [ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ] if not urltype or e.scheme == urltype) return cache
def get_missing_segs_from_frame_file_cache(datafindcaches): """ This function will use os.path.isfile to determine if all the frame files returned by the local datafind server actually exist on the disk. This can then be used to update the science times if needed. Parameters ----------- datafindcaches : OutGroupList List of all the datafind output files. Returns -------- missingFrameSegs : Dict. of ifo keyed glue.segment.segmentlist instances The times corresponding to missing frames found in datafindOuts. missingFrames: Dict. of ifo keyed lal.Cache instances The list of missing frames """ missingFrameSegs = {} missingFrames = {} for cache in datafindcaches: if len(cache) > 0: # Don't bother if these are not file:// urls, assume all urls in # one cache file must be the same type if not cache[0].scheme == 'file': warn_msg = "We have %s entries in the " %(cache[0].scheme,) warn_msg += "cache file. I do not check if these exist." logging.info(warn_msg) continue _, currMissingFrames = cache.checkfilesexist(on_missing="warn") missingSegs = segments.segmentlist(e.segment \ for e in currMissingFrames).coalesce() ifo = cache.ifo if not missingFrameSegs.has_key(ifo): missingFrameSegs[ifo] = missingSegs missingFrames[ifo] = lal.Cache(currMissingFrames) else: missingFrameSegs[ifo].extend(missingSegs) # NOTE: This .coalesce probably isn't needed as the segments # should be disjoint. If speed becomes an issue maybe remove it? missingFrameSegs[ifo].coalesce() missingFrames[ifo].extend(currMissingFrames) return missingFrameSegs, missingFrames
print "done" ############################################################################## # Step 12: Write out the LAL cache files for the various output data if gps_start_time is not None and gps_end_time is not None: print "generating cache files for output data products...", cache_fname = '' for ifo in ifo_analyze: cache_fname += ifo cache_fname += '-INSPIRAL_HIPE' if usertag: cache_fname += '_' + usertag cache_fname += '-' + str(gps_start_time) + '-' + \ str(gps_end_time - gps_start_time) + '.cache' output_data_cache = lal.Cache() for node in dag.get_nodes(): if isinstance(node, pipeline.LSCDataFindNode): # ignore datafind nodes, as their output is a cache file continue # add the data generated by the job to the output data cache output_file = node.get_output() if output_file.__class__.__name__ == 'list': output_data_cache.append(lal.Cache.from_urls(output_file)[0]) else: output_data_cache.append(lal.Cache.from_urls([output_file])[0]) if (isinstance(node,inspiral.CoireNode) or \ isinstance(node,inspiral.SireNode)) and \
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Find the framefiles for the given type in the [start, end) interval frame @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @param match: regular expression to match against @param urltype: file scheme to search for (e.g. 'file') @param on_gaps: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} @type match: L{str} @type urltype: L{str} @type on_gaps: L{str} @returns: L{Cache<glue.lal.Cache>} @raises RuntimeError: if gaps are found and C{on_gaps='error'} """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps == "warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)
############################################################################## # create the DAG writing the log to the specified directory dag = pipeline.CondorDAG(logfile) dag.set_dag_file(basename) ############################################################################## # Open the ihope cache and create THINCA cache print "Parsing the ihope cache..." coinc_tag = cp.get('pipeline', 'coinc-file-tag') ihope_cache = [line for line in file(options.ihope_cache) \ if coinc_tag in line or " INJECTIONS" in line or " PREGEN_INJFILE" in line] thinca_cache = lal.Cache([lal.CacheEntry(entry) for entry in ihope_cache \ if coinc_tag in entry]) inj_cache = lal.Cache([lal.CacheEntry(entry) for entry in ihope_cache if \ " INJECTIONS" in entry or " PREGEN_INJFILE" in entry]) del ihope_cache # get the USERTAGS from the thinca_cache # for single stage runs with ssipe, the thinca's output is of the form # IFOs_THINCA_UserTag_StartTime_Duration.xml.gz # where UserTag = TiSiNum_RunName_CAT_X_VETO skip_tags = 2 user_tags = set([ '_'.join([ entry.description.split('_')[ii] for ii in range( skip_tags, len(entry.description.split('_')) ) ]) for entry in thinca_cache ])
def followup(self, inj, selectIFO, description = None): """ Do the followup procedure for the missed injection 'inj' and create the several time-series for INSPIRAL and THINCA. The return value is the name of the created html file. @param inj: sim_inspiral table of the injection that needs to be followed up @param selectIFO: the IFO that is investigated @param description: Can be used to sieve further this pattern from the description field. """ def fill_table(page, contents ): """ Making life easier... """ page.add('<tr>') for content in contents: page.add('<td>') page.add( str(content) ) page.add('</td>') page.add('</tr>') # get the ID corresponding to this injection injection_id = self.findInjection( inj ) # increase internal number: self.number+=1 ## create the web-page and add a table page = markup.page() page.h1("Followup missed injection #"+str(self.number)+" in "+selectIFO ) page.hr() page.add('<table border="3" ><tr><td>') page.add('<table border="2" >') fill_table( page, ['<b>parameter','<b>value'] ) fill_table( page, ['Number', self.number] ) fill_table( page, ['inj ID', injection_id] ) fill_table( page, ['mass1', '%.2f'% inj.mass1] ) fill_table( page, ['mass2', '%.2f'%inj.mass2] ) fill_table( page, ['mtotal', '%.2f' % (inj.mass1+inj.mass2)] ) fill_table( page, ['mchirp', '%.2f' % (inj.mchirp)] ) fill_table( page, ['end_time', inj.geocent_end_time] ) fill_table( page, ['end_time_ns', inj.geocent_end_time_ns] ) fill_table( page, ['distance', '%.1f' % inj.distance] ) fill_table( page, ['eff_dist_h','%.1f' % inj.eff_dist_h] ) fill_table( page, ['eff_dist_l','%.1f' % inj.eff_dist_l] ) fill_table( page, ['eff_dist_v','%.1f' % inj.eff_dist_v] ) fill_table( page, ['eff_dist_g','%.1f' % inj.eff_dist_g] ) fill_table( page, ['playground','%s' % pipeline.s2play(inj.geocent_end_time)] ) page.add('</table></td>') # print infos to screen if required if self.opts.verbose: self.print_inj( inj, injection_id) # sieve the cache for the required INSPIRAL and THINCA files invest_dict = {} for stage, cache in self.triggerCache.iteritems(): trig_cache = lal.Cache() for c in cache: # check the time and the injection ID if inj.geocent_end_time in c.segment: if self.get_injection_id(url = c.url) == injection_id: trig_cache.append( c ) # create a filelist file_list = trig_cache.sieve(description = description).pfnlist() # check if the pfnlist is empty. ` if len(file_list)==0: print >>sys.stderr, "Error: No files found for stage %s in the "\ "cache for ID %s and time %d; probably mismatch of a "\ "pattern in the options. " % \ ( stage, injection_id, inj.geocent_end_time) continue # if the stage if THINCA_SECOND... if 'THINCA_SECOND' in stage: # ... need to loop over the four categories for cat in [1,2,3,4]: select_list=self.select_category( file_list, cat) if len(select_list)==0: print "WARNING: No THINCA_SECOND files found for category ", cat continue modstage = stage+'_CAT_' + str(cat) invest_dict[modstage] = self.investigateTimeseries( select_list, inj, selectIFO, modstage, self.number ) #sys.exit(0) else: invest_dict[stage]=self.investigateTimeseries( file_list, inj, selectIFO, stage, self.number) ## print out the result for this particular injection page.add('<td><table border="2" >') fill_table( page, ['<b>step','<b>F/M', '<b>Rec. SNR', '<b>Rec. mchirp', \ '<b>Rec. eff_dist', '<b>Rec. chisq', '<b>Veto ON/OFF'] ) # loop over the stages and create the table with # the various data in it (when available) for stage in self.orderLabels: if stage in invest_dict: result = invest_dict[stage] # Fill in the details of the loudest found coinc. #found_ifo='' #if "INSPIRAL" in stage or "THINCA" in stage: found_ifo='' loudest_snr='' loudest_mchirp='' loudest_eff_dist='' loudest_chisq='' veto_onoff='' # add all the IFO's for this coincident for ifo in result['foundset']: found_ifo += ifo+' ' # Parameters of the loudest trigger, taken from the # 'loudest-details' dictionary, created in 'investigateTimeseries' loudest_snr += ifo + ': ' + str(result['loudest_details'][ifo]['snr'])+'<br>' loudest_mchirp += ifo + ': ' + str(result['loudest_details'][ifo]['mchirp'])+'<br>' loudest_eff_dist += ifo + ': ' + str(result['loudest_details'][ifo]['eff_dist'])+'<br>' loudest_chisq += ifo + ': ' + str(result['loudest_details'][ifo]['chisq'])+'<br>' # Check whether some of the ifo times is vetoed timeTrigger = float(result['loudest_details'][ifo]['timeTrigger']) if (self.vetodict[ifo]): veto = self.isThereVeto (timeTrigger, ifo) veto_txt = 'OFF' if veto: veto_txt = 'ON' veto_onoff+=ifo+': '+veto_txt+'<br>' else: veto_onoff+=ifo+': No info<br>' # Fill the table whether something is found or not if len(result['foundset'])>0: fill_table( page, [ stage, 'FOUND in '+found_ifo, 'loudest<br>'+loudest_snr, \ 'loudest<br>'+loudest_mchirp, 'loudest<br>'+loudest_eff_dist,\ 'loudest<br>'+loudest_chisq, veto_onoff]) else: fill_table( page, [ stage, '<font color="red">MISSED']) page.add('</table>') page.add('</td></tr></table><br><br>') ## add the pictures to the webpage for stage in self.orderLabels: if stage in invest_dict: result = invest_dict[stage] ##if stage!="TMPLTBANK": if True: fname = result['filename'] page.a(extra.img(src=[fname], width=400, \ alt=fname, border="2"), title=fname, href=[ fname ]) # add version information page.add('<hr>Page created with %s Version %s' % \ (__prog__, git_version.verbose_msg)) # and write the html file htmlfilename = self.opts.prefix + "_"+selectIFO+"_followup_"+str(self.number) +\ self.opts.suffix+'.html' file = open(self.opts.output_path+htmlfilename,'w') file.write(page(False)) file.close() # store html file in fnameList self.fnameList.append(htmlfilename) # supply the output return htmlfilename
def thinca_coinc(ifo_list, single_data_analyzed, cafe_caches, cafe_base, lladd_job, tisi_file_name, lladd_veto_file, coinc_job, dag, do_coinc, do_insp, usertag=None, inspinjNode=None): """ Run thinca on the coincident times from each of the sets of IFOs. Since the way we treat all this data is the same, this function is the same for all. ifo_list = a list of the ifos we are to analyze single_data_analyzed = dictionary of single ifo data analyzed cafe_caches = the caches from ligolw_cafe.ligolw_cafe() cafe_base = the base name for the cafe caches lladd_job = the condor job to do ligolw_add tisi_file_name = the name of the tisi file to add lladd_veto_file = the name of the veto file to add or None coinc_job = the condor job to do thinca dag = the DAG to attach the nodes to do_coinc = whether we should add the thinca jobs to the dag do_insp = whether previous inspiral jobs are in the dag usertag = the usertag to add to the output file name inspinjNode = the inspinj node to be added as a parent to ligolw_add jobs """ # create caches using ligolw_cafe cache_names = ligolw_cafe.write_caches(cafe_base, cafe_caches, set(ifo_list)) coinc_analyzed = [] # loop over caches for idx in range(len(cafe_caches)): if len(cafe_caches[idx].objects): cache = cafe_caches[idx] cachename = cache_names[idx] thincabase = cafe_base.split('.')[0].replace('CAFE_','') ifos = set(cache_entry.observatory for cache_entry in cache.objects) # extract segment information seg = power.cache_span(cache.objects) seg = pipeline.AnalysisChunk(seg[0],seg[1]) # create node for ligolw_add to create xml file lladd = pipeline.LigolwAddNode(lladd_job) # add the tisi and veto files lladd.add_file_arg(tisi_file_name) if lladd_veto_file: lladd.add_file_arg(lladd_veto_file) # add the input xml files from the cafe cache cachefp = open(cachename,'r') cacheobject = lal.Cache().fromfile(cachefp) cachefp.close() cachepfns = cacheobject.pfnlist() for pfn in cachepfns: lladd.add_file_arg(pfn) # create node for ligolw_thinca to analyze xml file thinca = inspiral.ThincaNode(coinc_job) thinca.set_start(seg.start(), pass_to_command_line=False) thinca.set_end(seg.end(), pass_to_command_line=False) thinca.set_zip_output(True) if usertag: thinca.set_user_tag(thincabase, pass_to_command_line=False) # check if caches are adjacent coinc_end_time_segment = '' if idx and (cache.extent[0] == cafe_caches[idx-1].extent[1]): coinc_end_time_segment += str(cache.extent[0]) coinc_end_time_segment += ':' if idx + 1 - len(cafe_caches) and (cache.extent[1] == cafe_caches[idx+1].extent[0]): coinc_end_time_segment += str(cache.extent[1]) thinca.add_var_opt('coinc-end-time-segment',coinc_end_time_segment) # scroll through ifos, adding the appropriate ones for ifo in ifo_list: if ifo in ifos: thinca.set_ifo(ifo, pass_to_command_line=False) # add all inspiral jobs in this cache to input if do_insp: for cache_entry in cache.objects: lladd.add_parent(single_data_analyzed[cache_entry]) # add inspinj job as parent of each ligolw_add job if inspinjNode and opts.inspinj: lladd.add_parent(inspinjNode) # set output of ligolw_add jobs to follow thinca's convention lladd_outfile = re.sub('THINCA','LLWADD',thinca.get_output()) lladd.set_output(lladd_outfile) thinca.set_input(lladd.get_output(), pass_to_command_line=False) thinca.add_file_arg(lladd.get_output()) # check for condor settings if not opts.disable_dag_categories: lladd.set_category('ligolw_add') thinca.set_category('thinca') if not opts.disable_dag_priorities: lladd.set_priority(3) thinca.set_priority(3) # add ligolw_add and ligolw_thinca nodes to dag if do_coinc: dag.add_node(lladd) thinca.add_parent(lladd) dag.add_node(thinca) # add ligolw_thinca coincident segment coinc_analyzed.append(AnalyzedIFOData(seg,thinca)) return coinc_analyzed
gps_start_time, gps_end_time, inj_file = inj_file, ifotag="SUMMARY_FIRST", usertag = usertag, inspinjNode=inspinj) print "done" ############################################################################## # Step 6: Run thinca on each of the disjoint sets of coincident data if opts.coincidence: print "setting up thinca jobs..." sys.stdout.flush() # create a cache of the inspiral jobs single_data_analyzed = {} inspiral_cache = lal.Cache() for ifo in ifo_list: for insp in chunks_analyzed[ifo]: output_file = insp.get_dag_node().get_output() output_cache_entry = lal.Cache.from_urls([output_file])[0] inspiral_cache.append(output_cache_entry) single_data_analyzed[output_cache_entry] = insp.get_dag_node() # get the ligolw_thinca command line arguments thinca_job.add_ini_opts(cp, 'thinca') # add the vetoes to the ligolw_add and ligolw_thinca jobs if cp.has_section("vetoes"): lladd_veto_file = cp.get("vetoes","vetoes-file") thinca_job.add_opt("vetoes-name",cp.get("vetoes","vetoes-name")) else:
site, frametype, _, _ = framefile.split("-") except Exception, e: raise RuntimeError("Error parsing filename %s: %s" % (framefile, e)) url = ("%s/gwf/%s/%s/%s.json" % (_url_prefix, site, frametype, framefile)) response = self._requestresponse("GET", url) urllist = decode(response.read()) if len(urllist) == 0: if on_missing == "warn": sys.stderr.write("No files found!\n") elif on_missing == "error": raise RuntimeError("No files found!") # verify urltype is what we want cache = lal.Cache(e for e in [ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ] if not urltype or e.scheme == urltype) return cache def find_latest(self, site, frametype, urltype=None, on_missing="warn"): """Query for the most recent framefile of a given type. @param site: single-character name of site to match @param frametype: name of frametype to match @param urltype: file scheme to search for (e.g. 'file') @param on_missing: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning,