Python LSCDataFindNode Examples, glue.pipeline.LSCDataFindNode Python Examples

Example #1

0

Show file

File: power.py Project: ADACS-Australia/ADACS-SS18A-RSmith

def make_datafind_fragment(dag, instrument, seg):
	node = pipeline.LSCDataFindNode(datafindjob)
	node.set_name("ligo_data_find-%s-%d-%d" % (instrument, int(seg[0]), int(abs(seg))))
	node.set_start(seg[0] - datafind_pad)
	node.set_end(seg[1] + 1)
	# FIXME: argh, I need the node to know what instrument it's for,
	# but can't call set_ifo() because that adds a --channel-name
	# command line argument (!?)
	node._AnalysisNode__ifo = instrument
	node.set_observatory(instrument[0])
	if node.get_type() is None:
		node.set_type(datafindjob.get_config_file().get("datafind", "type_%s" % instrument))
	node.set_retry(3)
	dag.add_node(node)
	return set([node])

Example #2

0

Show file

    split_job.add_opt('minimal-match', cp.get('tmpltbank', 'minimal-match'))

    # get the pad and chunk lengths from the values in the ini file
    pad = int(cp.get('data', 'pad-data'))
    n = int(cp.get('data', 'segment-length'))
    s = int(cp.get('data', 'number-of-segments'))
    r = int(cp.get('data', 'sample-rate'))
    o = int(cp.get('inspiral', 'segment-overlap'))
    length = (n * s - (s - 1) * o) / r
    overlap = o / r
    if doCohPTF:
        overlap = int(cp.get('coh_PTF_inspiral', 'segment-duration')) / 2
    job_analysis_time = length - overlap

    # find the data between the start time and the end time
    df = pipeline.LSCDataFindNode(df_job)
    df.set_start(gps_start_time)
    df.set_end(gps_end_time)
    df.set_observatory(ifo[0])
    if ifo == 'V1':
        df.set_type(type)
    else:
        df.set_type(ifo + '_' + type)
    dag.add_node(df)

    # modify the start and end time by pad seconds
    log_fh.write("gps_start_time = %d\n" % gps_start_time)
    log_fh.write("gps_end_time = %d\n" % gps_end_time)

    # Don't need to do these, since we'll pad each segment
    # gps_start_time += pad

Example #3

0

Show file

        segfile.close()
    else:   # If we skip the segdb step, just construct a large segment
        print 'Faking segment from %i to %i\n'%(datastart,dataend)
        segs[ifo]=segments.segmentlist([segments.segment(int(datastart),int(dataend))])


for ifo in ifos:
    science_segs[ifo]=[]
    if types[ifo] in fakeTypes:
        science_segs[ifo].append(None)
    else:
        # Setup find data jobs
        for seg in segs[ifo]:
            sciseg=pipeline.ScienceSegment((segs[ifo].index(seg),seg[0],seg[1],seg[1]-seg[0]))
            science_segs[ifo].append(sciseg)
            df_node=pipeline.LSCDataFindNode(datafind_job)
            df_node.set_start(int(sciseg.start()))
            df_node.set_end(int(sciseg.end()))
            df_node.set_observatory(ifo[0])
            df_node.set_type(types[ifo])
            sciseg.set_df_node(df_node)

os.chdir('../../')

# Now loop over times and add datafind nodes to the dag

filtered_time=filter(lambda t: reduce(lambda a,b:a or b, map(lambda ifo: t in segs[ifo],ifos)), times)
times=filtered_time
print 'Found segments for %i times\n'%(len(times))

df_nodes_by_time={}

Example #4

0

Show file

  epoch_data.append_from_tuple(epoch)
  # read science segs that are greater or equal to a chunk from the input file
  data = pipeline.ScienceData()
  data.read(opts.segment_filename,0)
  # intersect the science segments with the calibration epoch
  data.intersection(epoch_data)
  # create the chunks from the science segments
  data.make_chunks(length,0,0,0,0)
  data.make_short_chunks_from_unused(0,0,0,0,0)

  # create all the LSCdataFind jobs to run in sequence
  prev_df1 = None
  prev_df2 = None
  # only do data find jobs if requested
  # find all the h(t) data
  df1 = pipeline.LSCDataFindNode(df_job)
  df1.set_start(int(epoch[1])-df_pad)
  df1.set_end(int(epoch[2]) +df_pad)
  df1.set_observatory(ifo[0])
  df1.set_type(datatype_hoft)
  df1.set_name("df1_"+ifo+"_"+str(epoch_cnt))
  # see if the cache files laying around are still okay
  if opts.check_datafind_jobs:
    try: df1cache = lal.Cache.fromfile(open(df1.get_output(),'r'))
    except: df1cache = None
    if df1cache: found,missed = df1cache.checkfilesexist("ignore")
    else: missed = True
  else: missed = True

  if opts.data_find and missed and opts.write_dax: df1.add_parent(mkdir_node)
  if prev_df1 and opts.data_find and missed:

Example #5

0

Show file

def analyze_ifo(ifo_name,ifo_data,ifo_to_do,tmplt_job,insp_job,df_job,\
  prev_df,dag, usertag=None, inspinjNode = None, insp_ckpt_job = None):
  """
  Analyze the data from a single IFO.  Since the way we treat all this data is
  the same, this function is the same for all interferometers. Returns the last
  LSCdataFind job that was executed and the chunks analyzed.
  
  ifo_name = the name of the IFO
  ifo_data = the master science segs 
  ifo_to_do = the science segments we need to analyze
  tmplt_job = if not FixedBank: template bank job we should use
  insp_job = the condor job that we should use to analyze data
  df_job = the condor job to find the data
  prev_df = the previous LSCdataFind job that was executed
  dag = the DAG to attach the nodes to
  usertag = the usertag to add to the job names
  inspinjNode = the inspinj node to be added as a parent to inspirals
  insp_ckpt_job = a checkpoint restore job for the inspiral code
  """

  # add the non veto inspiral options
  if cp.has_section('no-veto-inspiral'): 
    insp_job.add_ini_opts(cp,'no-veto-inspiral')
  
  # add the ifo specific options
  if cp.has_section(ifo_name.lower() + '-inspiral'): 
    insp_job.add_ini_opts(cp,ifo_name.lower() + '-inspiral')

  if cp.has_section(ifo_name.lower() + '-tmpltbank'):
    tmplt_job.add_ini_opts(cp,ifo_name.lower() + '-tmpltbank')

  # we may use a fixed bank specified in ini file
  try:
    FixedBank = cp.get('input','fixed-bank')
    print "For %s we use bank %s"%(ifo_name, FixedBank)
  except:
    FixedBank = None

  # get datatype info from config file
  data_opts, type, channel = inspiralutils.get_data_options(cp,ifo_name)
  
  if cp.has_section('tmpltbank-1'):
    tmplt_job.add_ini_opts(cp, 'tmpltbank-1')
  if cp.has_section(data_opts):
    tmplt_job.add_ini_opts(cp,data_opts)
    insp_job.add_ini_opts(cp,data_opts)

  tmplt_job.set_channel(channel)
  insp_job.set_channel(channel)

  # see if we are using calibrated data
  if cp.has_section(data_opts) and cp.has_option(data_opts,'calibrated-data'):
    calibrated = True
    print "we use calibrated data for", ifo_name
  else: calibrated = False

  # prepare the injection filename
  if ifo_data:
    injStart = ifo_data[0].start()
    injDuration = ifo_data[-1].end()-injStart
    injectionFileTemplate = "HL-INJECTION_%%s-%d-%d.xml" % \
      (injStart, injDuration)

  chunks_analyzed = []
  # loop over the master science segments
  for seg in ifo_data:

    # loop over the master analysis chunks in the science segment
    for chunk in seg:
      done_this_chunk = False

      # now loop over all the data that we need to filter
      for seg_to_do in ifo_to_do:

        # if the current chunk is in one of the segments we need to filter
        if not done_this_chunk and inspiral.overlap_test(chunk,seg_to_do):

          # make sure we only filter the master chunk once
          done_this_chunk = True

          # make sure we have done one and only one datafind for the segment
          if not opts.read_cache:
            if not seg.get_df_node():
              df = pipeline.LSCDataFindNode(df_job)
              if not opts.disable_dag_categories:
                df.set_category('datafind')
              if not opts.disable_dag_priorities:
                df.set_priority(100)
              df.set_observatory(ifo_name[0])
              # add a padding time to the start of the datafind call (but don't change datafind output name)
              if ifo_name == 'G1':
                dfsect = 'geo-data'
              elif ifo_name == 'V1':
                dfsect = 'virgo-data'
              else:
                dfsect = 'ligo-data'
              if cp.has_option(dfsect,ifo_name.lower() + '-datafind-start-padding'):
                padding=cp.get(dfsect,ifo_name.lower()+'-datafind-start-padding')
              else:
                padding=0.
              df.set_start(seg.start(),padding)
              df.set_end(seg.end())
              seg.set_df_node(df)
              if type: df.set_type(type)
              if prev_df and opts.disable_dag_categories:
                df.add_parent(prev_df)
              if opts.datafind: dag.add_node(df)
              prev_df = df
          else:
            prev_df = None  

          # make a template bank job for the master chunk
          bank = inspiral.TmpltBankNode(tmplt_job)
          if not opts.disable_dag_categories:
            bank.set_category('tmpltbank')
          if not opts.disable_dag_priorities:
            bank.set_priority(1)
          bank.set_start(chunk.start())
          bank.set_end(chunk.end())
          bank.set_ifo(ifo_name)
          bank.set_vds_group(ifo_name[0] + str(chunk.start()))
          if not opts.read_cache: bank.set_cache(df.get_output())
          else: bank.set_cache(cp.get('datafind',ifo_name+"-cache"))
          if not calibrated: bank.calibration()
          if opts.datafind: bank.add_parent(df)
          if (opts.template_bank and not FixedBank): dag.add_node(bank)
                  
          # make an inspiral job for the master chunk
          insp = inspiral.InspiralNode(insp_job)
          if not opts.disable_dag_categories:
            insp.set_category('inspiral1')
          if not opts.disable_dag_priorities:
            insp.set_priority(2)
          if usertag:
            insp.set_user_tag(usertag.split('_CAT')[0])
          insp.set_start(chunk.start())
          insp.set_end(chunk.end())
          insp.set_trig_start(chunk.trig_start())
          insp.set_trig_end(chunk.trig_end())
          insp.set_ifo(ifo_name)
          insp.set_ifo_tag("FIRST")
          insp.set_vds_group(ifo_name[0] + str(chunk.start()))
          if not opts.read_cache: insp.set_cache(df.get_output())
          else:  insp.set_cache(cp.get('datafind',ifo_name+"-cache"))
          if not calibrated: insp.calibration()
          if FixedBank:
            insp.set_bank(FixedBank)
          else:
            insp.set_bank(bank.get_output())
          
          if opts.datafind: insp.add_parent(df)
          if inspinjNode and opts.inspinj: insp.add_parent(inspinjNode) 
          if (opts.template_bank and not FixedBank): insp.add_parent(bank)
          if opts.inspiral: dag.add_node(insp)

          if opts.data_checkpoint:
            # make an inspiral checkpoint restore job
            insp_job.set_universe("vanilla")
            insp.set_data_checkpoint()
            insp.set_post_script(cp.get('condor','checkpoint-post-script'))
            insp.add_post_script_arg(os.path.join(os.getcwd(),insp.get_checkpoint_image()))
            insp_ckpt = inspiral.InspiralCkptNode(insp_ckpt_job)
            insp_ckpt.set_output(insp.get_output())
            insp_ckpt.set_injections(insp.get_injections())
            insp_ckpt.set_checkpoint_image(insp.get_checkpoint_image())

            if cp.has_option('pipeline','condor-c-site'):
              # additional requirements to launch jon on remote pool
              insp_ckpt_job.set_universe("grid")
              insp_ckpt.set_grid_start("pegasuslite")
              insp_ckpt.add_pegasus_profile("condor","grid_resource","condor %s" % cp.get('pipeline','condor-c-site'))
              insp_ckpt.add_pegasus_profile("condor","+remote_jobuniverse","5")
              insp_ckpt.add_pegasus_profile("condor","+remote_requirements","True")
              insp_ckpt.add_pegasus_profile("condor","+remote_ShouldTransferFiles","True")
              insp_ckpt.add_pegasus_profile("condor","+remote_WhenToTransferOutput","ON_EXIT")
              insp_ckpt.add_pegasus_profile("condor","+remote_TransferInputFiles",'"' + insp.get_checkpoint_image() + '"')
              insp_ckpt.add_pegasus_profile("condor","+remote_PeriodicRelease",'( JobStatus == 5 && HoldReasonCode == 13 && NumSystemHolds < 3 )')
            else:
              insp_ckpt_job.set_universe("vanilla")

            insp_ckpt.add_parent(insp)
            if opts.inspiral: dag.add_node(insp_ckpt)

            # ensure output is added to list of output files
            output = insp_ckpt.get_output()

            # store this chunk in the list of filtered data
            chunks_analyzed.append(AnalyzedIFOData(chunk,insp_ckpt))

          else:
            # XXX: ensure output is added to list of output files
            output = insp.get_output()

            # store this chunk in the list of filtered data
            chunks_analyzed.append(AnalyzedIFOData(chunk,insp))         

  return tuple([prev_df,chunks_analyzed])