def make_datafind_fragment(dag, instrument, seg): node = pipeline.LSCDataFindNode(datafindjob) node.set_name("ligo_data_find-%s-%d-%d" % (instrument, int(seg[0]), int(abs(seg)))) node.set_start(seg[0] - datafind_pad) node.set_end(seg[1] + 1) # FIXME: argh, I need the node to know what instrument it's for, # but can't call set_ifo() because that adds a --channel-name # command line argument (!?) node._AnalysisNode__ifo = instrument node.set_observatory(instrument[0]) if node.get_type() is None: node.set_type(datafindjob.get_config_file().get("datafind", "type_%s" % instrument)) node.set_retry(3) dag.add_node(node) return set([node])
split_job.add_opt('minimal-match', cp.get('tmpltbank', 'minimal-match')) # get the pad and chunk lengths from the values in the ini file pad = int(cp.get('data', 'pad-data')) n = int(cp.get('data', 'segment-length')) s = int(cp.get('data', 'number-of-segments')) r = int(cp.get('data', 'sample-rate')) o = int(cp.get('inspiral', 'segment-overlap')) length = (n * s - (s - 1) * o) / r overlap = o / r if doCohPTF: overlap = int(cp.get('coh_PTF_inspiral', 'segment-duration')) / 2 job_analysis_time = length - overlap # find the data between the start time and the end time df = pipeline.LSCDataFindNode(df_job) df.set_start(gps_start_time) df.set_end(gps_end_time) df.set_observatory(ifo[0]) if ifo == 'V1': df.set_type(type) else: df.set_type(ifo + '_' + type) dag.add_node(df) # modify the start and end time by pad seconds log_fh.write("gps_start_time = %d\n" % gps_start_time) log_fh.write("gps_end_time = %d\n" % gps_end_time) # Don't need to do these, since we'll pad each segment # gps_start_time += pad
segfile.close() else: # If we skip the segdb step, just construct a large segment print 'Faking segment from %i to %i\n'%(datastart,dataend) segs[ifo]=segments.segmentlist([segments.segment(int(datastart),int(dataend))]) for ifo in ifos: science_segs[ifo]=[] if types[ifo] in fakeTypes: science_segs[ifo].append(None) else: # Setup find data jobs for seg in segs[ifo]: sciseg=pipeline.ScienceSegment((segs[ifo].index(seg),seg[0],seg[1],seg[1]-seg[0])) science_segs[ifo].append(sciseg) df_node=pipeline.LSCDataFindNode(datafind_job) df_node.set_start(int(sciseg.start())) df_node.set_end(int(sciseg.end())) df_node.set_observatory(ifo[0]) df_node.set_type(types[ifo]) sciseg.set_df_node(df_node) os.chdir('../../') # Now loop over times and add datafind nodes to the dag filtered_time=filter(lambda t: reduce(lambda a,b:a or b, map(lambda ifo: t in segs[ifo],ifos)), times) times=filtered_time print 'Found segments for %i times\n'%(len(times)) df_nodes_by_time={}
epoch_data.append_from_tuple(epoch) # read science segs that are greater or equal to a chunk from the input file data = pipeline.ScienceData() data.read(opts.segment_filename,0) # intersect the science segments with the calibration epoch data.intersection(epoch_data) # create the chunks from the science segments data.make_chunks(length,0,0,0,0) data.make_short_chunks_from_unused(0,0,0,0,0) # create all the LSCdataFind jobs to run in sequence prev_df1 = None prev_df2 = None # only do data find jobs if requested # find all the h(t) data df1 = pipeline.LSCDataFindNode(df_job) df1.set_start(int(epoch[1])-df_pad) df1.set_end(int(epoch[2]) +df_pad) df1.set_observatory(ifo[0]) df1.set_type(datatype_hoft) df1.set_name("df1_"+ifo+"_"+str(epoch_cnt)) # see if the cache files laying around are still okay if opts.check_datafind_jobs: try: df1cache = lal.Cache.fromfile(open(df1.get_output(),'r')) except: df1cache = None if df1cache: found,missed = df1cache.checkfilesexist("ignore") else: missed = True else: missed = True if opts.data_find and missed and opts.write_dax: df1.add_parent(mkdir_node) if prev_df1 and opts.data_find and missed:
def analyze_ifo(ifo_name,ifo_data,ifo_to_do,tmplt_job,insp_job,df_job,\ prev_df,dag, usertag=None, inspinjNode = None, insp_ckpt_job = None): """ Analyze the data from a single IFO. Since the way we treat all this data is the same, this function is the same for all interferometers. Returns the last LSCdataFind job that was executed and the chunks analyzed. ifo_name = the name of the IFO ifo_data = the master science segs ifo_to_do = the science segments we need to analyze tmplt_job = if not FixedBank: template bank job we should use insp_job = the condor job that we should use to analyze data df_job = the condor job to find the data prev_df = the previous LSCdataFind job that was executed dag = the DAG to attach the nodes to usertag = the usertag to add to the job names inspinjNode = the inspinj node to be added as a parent to inspirals insp_ckpt_job = a checkpoint restore job for the inspiral code """ # add the non veto inspiral options if cp.has_section('no-veto-inspiral'): insp_job.add_ini_opts(cp,'no-veto-inspiral') # add the ifo specific options if cp.has_section(ifo_name.lower() + '-inspiral'): insp_job.add_ini_opts(cp,ifo_name.lower() + '-inspiral') if cp.has_section(ifo_name.lower() + '-tmpltbank'): tmplt_job.add_ini_opts(cp,ifo_name.lower() + '-tmpltbank') # we may use a fixed bank specified in ini file try: FixedBank = cp.get('input','fixed-bank') print "For %s we use bank %s"%(ifo_name, FixedBank) except: FixedBank = None # get datatype info from config file data_opts, type, channel = inspiralutils.get_data_options(cp,ifo_name) if cp.has_section('tmpltbank-1'): tmplt_job.add_ini_opts(cp, 'tmpltbank-1') if cp.has_section(data_opts): tmplt_job.add_ini_opts(cp,data_opts) insp_job.add_ini_opts(cp,data_opts) tmplt_job.set_channel(channel) insp_job.set_channel(channel) # see if we are using calibrated data if cp.has_section(data_opts) and cp.has_option(data_opts,'calibrated-data'): calibrated = True print "we use calibrated data for", ifo_name else: calibrated = False # prepare the injection filename if ifo_data: injStart = ifo_data[0].start() injDuration = ifo_data[-1].end()-injStart injectionFileTemplate = "HL-INJECTION_%%s-%d-%d.xml" % \ (injStart, injDuration) chunks_analyzed = [] # loop over the master science segments for seg in ifo_data: # loop over the master analysis chunks in the science segment for chunk in seg: done_this_chunk = False # now loop over all the data that we need to filter for seg_to_do in ifo_to_do: # if the current chunk is in one of the segments we need to filter if not done_this_chunk and inspiral.overlap_test(chunk,seg_to_do): # make sure we only filter the master chunk once done_this_chunk = True # make sure we have done one and only one datafind for the segment if not opts.read_cache: if not seg.get_df_node(): df = pipeline.LSCDataFindNode(df_job) if not opts.disable_dag_categories: df.set_category('datafind') if not opts.disable_dag_priorities: df.set_priority(100) df.set_observatory(ifo_name[0]) # add a padding time to the start of the datafind call (but don't change datafind output name) if ifo_name == 'G1': dfsect = 'geo-data' elif ifo_name == 'V1': dfsect = 'virgo-data' else: dfsect = 'ligo-data' if cp.has_option(dfsect,ifo_name.lower() + '-datafind-start-padding'): padding=cp.get(dfsect,ifo_name.lower()+'-datafind-start-padding') else: padding=0. df.set_start(seg.start(),padding) df.set_end(seg.end()) seg.set_df_node(df) if type: df.set_type(type) if prev_df and opts.disable_dag_categories: df.add_parent(prev_df) if opts.datafind: dag.add_node(df) prev_df = df else: prev_df = None # make a template bank job for the master chunk bank = inspiral.TmpltBankNode(tmplt_job) if not opts.disable_dag_categories: bank.set_category('tmpltbank') if not opts.disable_dag_priorities: bank.set_priority(1) bank.set_start(chunk.start()) bank.set_end(chunk.end()) bank.set_ifo(ifo_name) bank.set_vds_group(ifo_name[0] + str(chunk.start())) if not opts.read_cache: bank.set_cache(df.get_output()) else: bank.set_cache(cp.get('datafind',ifo_name+"-cache")) if not calibrated: bank.calibration() if opts.datafind: bank.add_parent(df) if (opts.template_bank and not FixedBank): dag.add_node(bank) # make an inspiral job for the master chunk insp = inspiral.InspiralNode(insp_job) if not opts.disable_dag_categories: insp.set_category('inspiral1') if not opts.disable_dag_priorities: insp.set_priority(2) if usertag: insp.set_user_tag(usertag.split('_CAT')[0]) insp.set_start(chunk.start()) insp.set_end(chunk.end()) insp.set_trig_start(chunk.trig_start()) insp.set_trig_end(chunk.trig_end()) insp.set_ifo(ifo_name) insp.set_ifo_tag("FIRST") insp.set_vds_group(ifo_name[0] + str(chunk.start())) if not opts.read_cache: insp.set_cache(df.get_output()) else: insp.set_cache(cp.get('datafind',ifo_name+"-cache")) if not calibrated: insp.calibration() if FixedBank: insp.set_bank(FixedBank) else: insp.set_bank(bank.get_output()) if opts.datafind: insp.add_parent(df) if inspinjNode and opts.inspinj: insp.add_parent(inspinjNode) if (opts.template_bank and not FixedBank): insp.add_parent(bank) if opts.inspiral: dag.add_node(insp) if opts.data_checkpoint: # make an inspiral checkpoint restore job insp_job.set_universe("vanilla") insp.set_data_checkpoint() insp.set_post_script(cp.get('condor','checkpoint-post-script')) insp.add_post_script_arg(os.path.join(os.getcwd(),insp.get_checkpoint_image())) insp_ckpt = inspiral.InspiralCkptNode(insp_ckpt_job) insp_ckpt.set_output(insp.get_output()) insp_ckpt.set_injections(insp.get_injections()) insp_ckpt.set_checkpoint_image(insp.get_checkpoint_image()) if cp.has_option('pipeline','condor-c-site'): # additional requirements to launch jon on remote pool insp_ckpt_job.set_universe("grid") insp_ckpt.set_grid_start("pegasuslite") insp_ckpt.add_pegasus_profile("condor","grid_resource","condor %s" % cp.get('pipeline','condor-c-site')) insp_ckpt.add_pegasus_profile("condor","+remote_jobuniverse","5") insp_ckpt.add_pegasus_profile("condor","+remote_requirements","True") insp_ckpt.add_pegasus_profile("condor","+remote_ShouldTransferFiles","True") insp_ckpt.add_pegasus_profile("condor","+remote_WhenToTransferOutput","ON_EXIT") insp_ckpt.add_pegasus_profile("condor","+remote_TransferInputFiles",'"' + insp.get_checkpoint_image() + '"') insp_ckpt.add_pegasus_profile("condor","+remote_PeriodicRelease",'( JobStatus == 5 && HoldReasonCode == 13 && NumSystemHolds < 3 )') else: insp_ckpt_job.set_universe("vanilla") insp_ckpt.add_parent(insp) if opts.inspiral: dag.add_node(insp_ckpt) # ensure output is added to list of output files output = insp_ckpt.get_output() # store this chunk in the list of filtered data chunks_analyzed.append(AnalyzedIFOData(chunk,insp_ckpt)) else: # XXX: ensure output is added to list of output files output = insp.get_output() # store this chunk in the list of filtered data chunks_analyzed.append(AnalyzedIFOData(chunk,insp)) return tuple([prev_df,chunks_analyzed])