Beispiel #1
0
def make_ligolw_add(orig_cache, pattern, outfile, logdir, cp, ligolw_job=None):
  """
  Take a cache, sieve it for pattern, create a cache-file for ligolw_add,
  and create a LigolwAddNode that will create a new
  output cache with the matching files replaced by the single ligolw_added
  file.
  """
  # determine the files to ligolw_add
  sub_cache = orig_cache.sieve(description=pattern)
  if len(sub_cache) == 0:
    print("warning: no files on which to run ligolw_add", file=sys.stderr)
    return None

  # create the cache-file
  cachefile = os.path.basename( outfile )[:-3]+'cache'
  sub_cache.tofile(open(cachefile,'w'))

  if ligolw_job is None:
    ligolw_job = pipeline.LigolwAddJob(logdir, cp)
    ligolw_job.set_universe("local")

  node = pipeline.LigolwAddNode(ligolw_job)
  node.add_output_file(outfile)
  node.add_var_opt("input", cachefile)
  node.add_var_opt("output", outfile)

  # return the cache-file without the ones
  # just extracted above
  new_cache = lal.Cache([entry for entry in orig_cache if entry not in sub_cache])
  new_cache.extend(lal.Cache.from_urls([outfile]))
  return node, new_cache
def GenerateCache(fileList):
    """
  Generate a lal.Cache for the list of files

  @param fileList : a list of files
  @return cache
  """
    cache = lal.Cache()
    for file in fileList:
        AddFileToCache(file, cache)
    return (cache)
Beispiel #3
0
    def find_latest(self, site, frametype, urltype=None, on_missing="warn"):
        """Query for the most recent framefile of a given type.

        @param  site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param urltype:
            file scheme to search for (e.g. 'file')
        @param on_missing:
            what to do when the requested frame isn't found, one of:
                - C{'warn'} (default): print a warning,
                - C{'error'}: raise an L{RuntimeError}, or
                - C{'ignore'}: do nothing

        @type       site: L{str}
        @type  frametype: L{str}
        @type    urltype: L{str}
        @type on_missing: L{str}

        @returns: L{Cache<glue.lal.Cache>} with one
                  L{entry<glue.lal.CacheEntry>}

        @raises RuntimeError: if given framefile is malformed
        @raises RuntimeError: if no frames are found and C{on_missing='error'}
        """
        if on_missing not in ('warn', 'error', 'ignore'):
            raise ValueError(
                "on_missing must be 'warn', 'error', or 'ignore'.")
        url = "%s/gwf/%s/%s/latest" % (_url_prefix, site, frametype)
        # if a URL type is specified append it to the path
        if urltype:
            url += "/%s" % urltype
        # request JSON output
        url += ".json"
        response = self._requestresponse("GET", url)
        urllist = decode(response.read())
        if len(urllist) == 0:
            if on_missing == "warn":
                sys.stderr.write("No files found!\n")
            elif on_missing == "error":
                raise RuntimeError("No files found!")
        return lal.Cache([
            lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType)
            for x in urllist
        ])
Beispiel #4
0
    def find_frame(self, framefile, urltype=None, on_missing="warn"):
        """Query the LDR host for a single framefile

        @returns: L{Cache<glue.lal.Cache>}

        @param frametype:
            name of frametype to match
        @param urltype:
            file scheme to search for (e.g. 'file')
        @param on_missing:
            what to do when the requested frame isn't found, one of:
                - C{'warn'} (default): print a warning,
                - C{'error'}: raise an L{RuntimeError}, or
                - C{'ignore'}: do nothing

        @type  frametype: L{str}
        @type    urltype: L{str}
        @type on_missing: L{str}

        @raises RuntimeError: if given framefile is malformed
        """
        if on_missing not in ("warn", "error", "ignore"):
            raise ValueError(
                "on_missing must be 'warn', 'error', or 'ignore'.")
        framefile = os.path.basename(framefile)
        # parse file name for site, frame type
        try:
            site, frametype, _, _ = framefile.split("-")
        except Exception as e:
            raise RuntimeError("Error parsing filename %s: %s" %
                               (framefile, e))
        url = ("%s/gwf/%s/%s/%s.json" %
               (_url_prefix, site, frametype, framefile))
        response = self._requestresponse("GET", url)
        urllist = decode(response.read())
        if len(urllist) == 0:
            if on_missing == "warn":
                sys.stderr.write("No files found!\n")
            elif on_missing == "error":
                raise RuntimeError("No files found!")
        # verify urltype is what we want
        cache = lal.Cache(e for e in [
            lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType)
            for x in urllist
        ] if not urltype or e.scheme == urltype)
        return cache
Beispiel #5
0
def get_missing_segs_from_frame_file_cache(datafindcaches):
    """
    This function will use os.path.isfile to determine if all the frame files
    returned by the local datafind server actually exist on the disk. This can
    then be used to update the science times if needed.

    Parameters
    -----------
    datafindcaches : OutGroupList
        List of all the datafind output files.

    Returns
    --------
    missingFrameSegs : Dict. of ifo keyed glue.segment.segmentlist instances
        The times corresponding to missing frames found in datafindOuts.
    missingFrames: Dict. of ifo keyed lal.Cache instances
        The list of missing frames
    """
    missingFrameSegs = {}
    missingFrames = {}
    for cache in datafindcaches:
        if len(cache) > 0:
            # Don't bother if these are not file:// urls, assume all urls in
            # one cache file must be the same type
            if not cache[0].scheme == 'file':
                warn_msg = "We have %s entries in the " %(cache[0].scheme,)
                warn_msg += "cache file. I do not check if these exist."
                logging.info(warn_msg)
                continue
            _, currMissingFrames = cache.checkfilesexist(on_missing="warn")
            missingSegs = segments.segmentlist(e.segment \
                                         for e in currMissingFrames).coalesce()
            ifo = cache.ifo
            if not missingFrameSegs.has_key(ifo):
                missingFrameSegs[ifo] = missingSegs
                missingFrames[ifo] = lal.Cache(currMissingFrames)
            else:
                missingFrameSegs[ifo].extend(missingSegs)
                # NOTE: This .coalesce probably isn't needed as the segments
                # should be disjoint. If speed becomes an issue maybe remove it?
                missingFrameSegs[ifo].coalesce()
                missingFrames[ifo].extend(currMissingFrames)
    return missingFrameSegs, missingFrames
Beispiel #6
0
print "done"

##############################################################################
# Step 12: Write out the LAL cache files for the various output data

if gps_start_time is not None and gps_end_time is not None:
    print "generating cache files for output data products...",
    cache_fname = ''
    for ifo in ifo_analyze:
        cache_fname += ifo
    cache_fname += '-INSPIRAL_HIPE'
    if usertag: cache_fname += '_' + usertag
    cache_fname += '-' + str(gps_start_time) + '-' + \
      str(gps_end_time - gps_start_time) + '.cache'
    output_data_cache = lal.Cache()

    for node in dag.get_nodes():
        if isinstance(node, pipeline.LSCDataFindNode):
            # ignore datafind nodes, as their output is a cache file
            continue

        # add the data generated by the job to the output data cache
        output_file = node.get_output()

        if output_file.__class__.__name__ == 'list':
            output_data_cache.append(lal.Cache.from_urls(output_file)[0])
        else:
            output_data_cache.append(lal.Cache.from_urls([output_file])[0])
        if (isinstance(node,inspiral.CoireNode) or \
            isinstance(node,inspiral.SireNode)) and \
Beispiel #7
0
    def find_frame_urls(self,
                        site,
                        frametype,
                        gpsstart,
                        gpsend,
                        match=None,
                        urltype=None,
                        on_gaps="warn"):
        """Find the framefiles for the given type in the [start, end) interval
        frame

        @param site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param gpsstart:
            integer GPS start time of query
        @param gpsend:
            integer GPS end time of query
        @param match:
            regular expression to match against
        @param urltype:
            file scheme to search for (e.g. 'file')
        @param on_gaps:
            what to do when the requested frame isn't found, one of:
                - C{'warn'} (default): print a warning,
                - C{'error'}: raise an L{RuntimeError}, or
                - C{'ignore'}: do nothing

        @type       site: L{str}
        @type  frametype: L{str}
        @type   gpsstart: L{int}
        @type     gpsend: L{int}
        @type      match: L{str}
        @type    urltype: L{str}
        @type    on_gaps: L{str}

        @returns: L{Cache<glue.lal.Cache>}

        @raises RuntimeError: if gaps are found and C{on_gaps='error'}
        """
        if on_gaps not in ("warn", "error", "ignore"):
            raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.")
        url = ("%s/gwf/%s/%s/%s,%s" %
               (_url_prefix, site, frametype, gpsstart, gpsend))
        # if a URL type is specified append it to the path
        if urltype:
            url += "/%s" % urltype
        # request JSON output
        url += ".json"
        # append a regex if input
        if match:
            url += "?match=%s" % match
        # make query
        response = self._requestresponse("GET", url)
        urllist = decode(response.read())

        out = lal.Cache([
            lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType)
            for x in urllist
        ])

        if on_gaps == "ignore":
            return out
        else:
            span = segments.segment(gpsstart, gpsend)
            seglist = segments.segmentlist(e.segment for e in out).coalesce()
            missing = (segments.segmentlist([span]) - seglist).coalesce()
            if span in seglist:
                return out
            else:
                msg = "Missing segments: \n%s" % "\n".join(map(str, missing))
                if on_gaps == "warn":
                    sys.stderr.write("%s\n" % msg)
                    return out
                else:
                    raise RuntimeError(msg)
Beispiel #8
0
##############################################################################
# create the DAG writing the log to the specified directory
dag = pipeline.CondorDAG(logfile)
dag.set_dag_file(basename)

##############################################################################
# Open the ihope cache and create THINCA cache

print "Parsing the ihope cache..."

coinc_tag = cp.get('pipeline', 'coinc-file-tag')
ihope_cache = [line for line in file(options.ihope_cache) \
  if coinc_tag in line or " INJECTIONS" in line or " PREGEN_INJFILE" in line]

thinca_cache = lal.Cache([lal.CacheEntry(entry) for entry in ihope_cache \
  if coinc_tag in entry])
inj_cache = lal.Cache([lal.CacheEntry(entry) for entry in ihope_cache if \
  " INJECTIONS" in entry or " PREGEN_INJFILE" in entry])

del ihope_cache

# get the USERTAGS from the thinca_cache
# for single stage runs with ssipe, the thinca's output is of the form
# IFOs_THINCA_UserTag_StartTime_Duration.xml.gz
# where UserTag = TiSiNum_RunName_CAT_X_VETO

skip_tags = 2
user_tags = set([ '_'.join([ entry.description.split('_')[ii] 
  for ii in range( skip_tags, len(entry.description.split('_')) ) ])
  for entry in thinca_cache ])
Beispiel #9
0
  def followup(self, inj, selectIFO, description = None):
    """
    Do the followup procedure for the missed injection 'inj'
    and create the several time-series for INSPIRAL and THINCA.
    The return value is the name of the created html file.
    @param inj: sim_inspiral table of the injection that needs to be
                followed up
    @param selectIFO: the IFO that is investigated
    @param description: Can be used to sieve further this pattern
                        from the description field.
    """
    
    def fill_table(page, contents ):
      """
      Making life easier...
      """
      page.add('<tr>')
      for content in contents:
        page.add('<td>')
        page.add( str(content) )
        page.add('</td>')
      page.add('</tr>')

   
    # get the ID corresponding to this injection
    injection_id = self.findInjection( inj )

    # increase internal number:
    self.number+=1

    ## create the web-page and add a table
    page = markup.page()
    page.h1("Followup missed injection #"+str(self.number)+" in "+selectIFO )
    page.hr()
    page.add('<table border="3" ><tr><td>')
    page.add('<table border="2" >')          
    fill_table( page, ['<b>parameter','<b>value'] )
    fill_table( page, ['Number', self.number] )
    fill_table( page, ['inj ID', injection_id] )
    fill_table( page, ['mass1', '%.2f'% inj.mass1] )
    fill_table( page, ['mass2', '%.2f'%inj.mass2] )
    fill_table( page, ['mtotal', '%.2f' % (inj.mass1+inj.mass2)] )
    fill_table( page, ['mchirp', '%.2f' % (inj.mchirp)] )
    fill_table( page, ['end_time', inj.geocent_end_time] )
    fill_table( page, ['end_time_ns', inj.geocent_end_time_ns] )    
    fill_table( page, ['distance', '%.1f' % inj.distance] )
    fill_table( page, ['eff_dist_h','%.1f' %  inj.eff_dist_h] )
    fill_table( page, ['eff_dist_l','%.1f' %  inj.eff_dist_l] )
    fill_table( page, ['eff_dist_v','%.1f' %  inj.eff_dist_v] )
    fill_table( page, ['eff_dist_g','%.1f' %  inj.eff_dist_g] )  
    fill_table( page, ['playground','%s' %  pipeline.s2play(inj.geocent_end_time)] )    
    page.add('</table></td>')
    
    # print infos to screen if required
    if self.opts.verbose:
      self.print_inj( inj,  injection_id)

    # sieve the cache for the required INSPIRAL and THINCA files
    invest_dict = {}
    for stage, cache in self.triggerCache.iteritems():

      trig_cache = lal.Cache()
      for c in cache:

        # check the time and the injection ID
        if inj.geocent_end_time in c.segment:
          if self.get_injection_id(url = c.url) == injection_id:
            trig_cache.append( c )

      # create a filelist
      file_list = trig_cache.sieve(description = description).pfnlist()
        
      # check if the pfnlist is empty. `
      if len(file_list)==0:
        print >>sys.stderr, "Error: No files found for stage %s in the "\
              "cache for ID %s and time %d; probably mismatch of a "\
              "pattern in the options. " % \
              ( stage, injection_id, inj.geocent_end_time)        
        continue

      # if the stage if THINCA_SECOND...
      if 'THINCA_SECOND' in stage:

        # ... need to loop over the four categories
        for cat in [1,2,3,4]:
          
          select_list=self.select_category( file_list, cat)
          if len(select_list)==0:
            print "WARNING: No THINCA_SECOND files found for category ", cat
            continue
          
          modstage = stage+'_CAT_' + str(cat)
          invest_dict[modstage] = self.investigateTimeseries( select_list, inj, selectIFO, modstage, self.number )

        #sys.exit(0)
      else:
        invest_dict[stage]=self.investigateTimeseries( file_list, inj, selectIFO, stage, self.number)

      
      
    ## print out the result for this particular injection
    page.add('<td><table border="2" >')
    fill_table( page, ['<b>step','<b>F/M', '<b>Rec. SNR', '<b>Rec. mchirp', \
                      '<b>Rec. eff_dist', '<b>Rec. chisq', '<b>Veto ON/OFF'] )

    # loop over the stages and create the table with
    # the various data in it (when available)
    for stage in self.orderLabels:
      if stage in invest_dict:
        result = invest_dict[stage]

        # Fill in the details of the loudest found coinc.
        #found_ifo=''
        #if "INSPIRAL" in stage or "THINCA" in stage:
        found_ifo=''
        loudest_snr=''
        loudest_mchirp=''
        loudest_eff_dist=''
        loudest_chisq=''
        veto_onoff=''

        # add all the IFO's for this coincident
        for ifo in result['foundset']:
          found_ifo += ifo+' '
          
          # Parameters of the loudest trigger, taken from the
          # 'loudest-details' dictionary, created in 'investigateTimeseries'
          loudest_snr += ifo + ': ' + str(result['loudest_details'][ifo]['snr'])+'<br>'
          loudest_mchirp += ifo + ': ' + str(result['loudest_details'][ifo]['mchirp'])+'<br>'
          loudest_eff_dist += ifo + ': ' + str(result['loudest_details'][ifo]['eff_dist'])+'<br>'
          loudest_chisq += ifo + ': ' + str(result['loudest_details'][ifo]['chisq'])+'<br>'
          
          # Check whether some of the ifo times is vetoed
          timeTrigger = float(result['loudest_details'][ifo]['timeTrigger'])
          if (self.vetodict[ifo]):
            veto = self.isThereVeto (timeTrigger, ifo)
            veto_txt = 'OFF'
            if veto:
              veto_txt = 'ON'              
            veto_onoff+=ifo+': '+veto_txt+'<br>'
          else: 
            veto_onoff+=ifo+': No info<br>'

        # Fill the table whether something is found or not
        if len(result['foundset'])>0:
          fill_table( page, [ stage,  'FOUND in '+found_ifo, 'loudest<br>'+loudest_snr, \
                              'loudest<br>'+loudest_mchirp, 'loudest<br>'+loudest_eff_dist,\
                              'loudest<br>'+loudest_chisq, veto_onoff])
        else:
          fill_table( page, [ stage,  '<font color="red">MISSED'])
          
    page.add('</table>')
    page.add('</td></tr></table><br><br>')


    ## add the pictures to the webpage
    for stage in self.orderLabels:
      if stage in invest_dict:
        result = invest_dict[stage]
      
        ##if stage!="TMPLTBANK":
        if True:
          fname = result['filename']
          page.a(extra.img(src=[fname], width=400, \
                           alt=fname, border="2"), title=fname, href=[ fname ])
          
    # add version information
    page.add('<hr>Page created with %s Version %s' % \
        (__prog__, git_version.verbose_msg))
    
    # and write the html file
    htmlfilename = self.opts.prefix + "_"+selectIFO+"_followup_"+str(self.number) +\
                         self.opts.suffix+'.html'
    file = open(self.opts.output_path+htmlfilename,'w')      
    file.write(page(False))
    file.close()

    # store html file in fnameList
    self.fnameList.append(htmlfilename)

    # supply the output
    return htmlfilename
Beispiel #10
0
def thinca_coinc(ifo_list, single_data_analyzed, cafe_caches, cafe_base,
    lladd_job, tisi_file_name, lladd_veto_file, 
    coinc_job, dag, do_coinc, do_insp, usertag=None, inspinjNode=None):
  """
  Run thinca on the coincident times from each of the sets of IFOs. 
  Since the way we treat all this data is the same, this function is the same 
  for all. 

  ifo_list = a list of the ifos we are to analyze
  single_data_analyzed = dictionary of single ifo data analyzed
  cafe_caches = the caches from ligolw_cafe.ligolw_cafe()
  cafe_base = the base name for the cafe caches
  lladd_job = the condor job to do ligolw_add
  tisi_file_name = the name of the tisi file to add
  lladd_veto_file = the name of the veto file to add or None
  coinc_job = the condor job to do thinca
  dag = the DAG to attach the nodes to
  do_coinc = whether we should add the thinca jobs to the dag
  do_insp  = whether previous inspiral jobs are in the dag
  usertag = the usertag to add to the output file name
  inspinjNode = the inspinj node to be added as a parent to ligolw_add jobs
  """

  # create caches using ligolw_cafe
  cache_names = ligolw_cafe.write_caches(cafe_base, cafe_caches, set(ifo_list))
  coinc_analyzed = []

  # loop over caches
  for idx in range(len(cafe_caches)):
    if len(cafe_caches[idx].objects):
      cache = cafe_caches[idx]
      cachename = cache_names[idx]
      thincabase = cafe_base.split('.')[0].replace('CAFE_','')
      ifos = set(cache_entry.observatory for cache_entry in cache.objects)

      # extract segment information
      seg = power.cache_span(cache.objects)
      seg = pipeline.AnalysisChunk(seg[0],seg[1])

      # create node for ligolw_add to create xml file
      lladd = pipeline.LigolwAddNode(lladd_job)
      
      # add the tisi and veto files
      lladd.add_file_arg(tisi_file_name)
      if lladd_veto_file:
        lladd.add_file_arg(lladd_veto_file)

      # add the input xml files from the cafe cache
      cachefp = open(cachename,'r')
      cacheobject = lal.Cache().fromfile(cachefp)
      cachefp.close()
      cachepfns = cacheobject.pfnlist()
      for pfn in cachepfns:
        lladd.add_file_arg(pfn)

      # create node for ligolw_thinca to analyze xml file
      thinca = inspiral.ThincaNode(coinc_job)
      thinca.set_start(seg.start(), pass_to_command_line=False)
      thinca.set_end(seg.end(), pass_to_command_line=False)
      thinca.set_zip_output(True)
      if usertag: thinca.set_user_tag(thincabase, pass_to_command_line=False)

      # check if caches are adjacent
      coinc_end_time_segment = ''
      if idx and (cache.extent[0] == cafe_caches[idx-1].extent[1]):
        coinc_end_time_segment += str(cache.extent[0])
      coinc_end_time_segment += ':'
      if idx + 1 - len(cafe_caches) and (cache.extent[1] == cafe_caches[idx+1].extent[0]):
        coinc_end_time_segment += str(cache.extent[1])
      thinca.add_var_opt('coinc-end-time-segment',coinc_end_time_segment)

      # scroll through ifos, adding the appropriate ones
      for ifo in ifo_list:
        if ifo in ifos:
          thinca.set_ifo(ifo, pass_to_command_line=False)
                
      # add all inspiral jobs in this cache to input
      if do_insp:
        for cache_entry in cache.objects:
          lladd.add_parent(single_data_analyzed[cache_entry])

      # add inspinj job as parent of each ligolw_add job
      if inspinjNode and opts.inspinj: lladd.add_parent(inspinjNode)

      # set output of ligolw_add jobs to follow thinca's convention
      lladd_outfile = re.sub('THINCA','LLWADD',thinca.get_output())
      lladd.set_output(lladd_outfile)
      thinca.set_input(lladd.get_output(), pass_to_command_line=False)
      thinca.add_file_arg(lladd.get_output())

      # check for condor settings
      if not opts.disable_dag_categories:
        lladd.set_category('ligolw_add')
        thinca.set_category('thinca')
      if not opts.disable_dag_priorities:
        lladd.set_priority(3)
        thinca.set_priority(3)

      # add ligolw_add and ligolw_thinca nodes to dag
      if do_coinc:
        dag.add_node(lladd)
        thinca.add_parent(lladd)
        dag.add_node(thinca)

      # add ligolw_thinca coincident segment
      coinc_analyzed.append(AnalyzedIFOData(seg,thinca))

  return coinc_analyzed
Beispiel #11
0
        gps_start_time, gps_end_time, inj_file = inj_file, 
        ifotag="SUMMARY_FIRST", usertag = usertag, inspinjNode=inspinj)

  print "done" 


##############################################################################
# Step 6: Run thinca on each of the disjoint sets of coincident data

if opts.coincidence:
  print "setting up thinca jobs..."
  sys.stdout.flush()

  # create a cache of the inspiral jobs
  single_data_analyzed = {}
  inspiral_cache = lal.Cache()
  for ifo in ifo_list:
    for insp in chunks_analyzed[ifo]:
      output_file = insp.get_dag_node().get_output()
      output_cache_entry = lal.Cache.from_urls([output_file])[0]
      inspiral_cache.append(output_cache_entry)
      single_data_analyzed[output_cache_entry] = insp.get_dag_node()

  # get the ligolw_thinca command line arguments
  thinca_job.add_ini_opts(cp, 'thinca')

  # add the vetoes to the ligolw_add and ligolw_thinca jobs
  if cp.has_section("vetoes"):
    lladd_veto_file = cp.get("vetoes","vetoes-file")
    thinca_job.add_opt("vetoes-name",cp.get("vetoes","vetoes-name"))
  else:
Beispiel #12
0
            site, frametype, _, _ = framefile.split("-")
        except Exception, e:
            raise RuntimeError("Error parsing filename %s: %s" %
                               (framefile, e))
        url = ("%s/gwf/%s/%s/%s.json" %
               (_url_prefix, site, frametype, framefile))
        response = self._requestresponse("GET", url)
        urllist = decode(response.read())
        if len(urllist) == 0:
            if on_missing == "warn":
                sys.stderr.write("No files found!\n")
            elif on_missing == "error":
                raise RuntimeError("No files found!")
        # verify urltype is what we want
        cache = lal.Cache(e for e in [
            lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType)
            for x in urllist
        ] if not urltype or e.scheme == urltype)
        return cache

    def find_latest(self, site, frametype, urltype=None, on_missing="warn"):
        """Query for the most recent framefile of a given type.

        @param  site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param urltype:
            file scheme to search for (e.g. 'file')
        @param on_missing:
            what to do when the requested frame isn't found, one of:
                - C{'warn'} (default): print a warning,