def make_coinc_branch(dag, datafinds, seglistdict, time_slides, timing_params, psds_per_power, enable_clustering, tag, do_injections = False, verbose = False): # injection list if do_injections: assert len(time_slides) == 1 if verbose: print >>sys.stderr, "Building lalapps_binj jobs ..." binjnodes = power.make_binj_fragment(dag, seglistdict.extent_all(), time_slides.keys()[0], tag, 0.0, float(power.powerjob.get_opts()["low-freq-cutoff"]), float(power.powerjob.get_opts()["low-freq-cutoff"]) + float(power.powerjob.get_opts()["bandwidth"])) # add binj nodes as parents of the datafinds to force the binj's to # be run first. this ensures that once a datafind has run the # power jobs that follow it will immediately be able to run, which # helps depth-first dagman do smarter things. for node in datafinds: for binjnode in binjnodes: node.add_parent(binjnode) else: binjnodes = set() # single-instrument trigger generation trigger_nodes = power.make_single_instrument_stage(dag, datafinds, seglistdict, tag, timing_params, psds_per_power, binjnodes = binjnodes, verbose = verbose) if enable_clustering: if verbose: print >>sys.stderr, "building pre-lladd bucluster jobs ..." trigger_nodes = power.make_bucluster_fragment(dag, trigger_nodes, "PRELLADD_%s" % tag, verbose = verbose) # coincidence analysis coinc_nodes = set() binj_cache = set([cache_entry for node in binjnodes for cache_entry in node.get_output_cache()]) # otherwise too many copies of the offset vector will be fed into # burca assert len(binj_cache) < 2 for n, (time_slides_cache_entry, these_time_slides) in enumerate(time_slides.items()): if verbose: print >>sys.stderr, "%s %d/%d (%s):" % (tag, n + 1, len(time_slides), time_slides_cache_entry.path) tisi_cache = set([time_slides_cache_entry]) if do_injections: # lalapps_binj has already copied the time slide # document into its own output extra_input_cache = set() else: # ligolw_add needs to copy the time slide document # into is output extra_input_cache = tisi_cache nodes = set() for seg, parents, cache, clipseg in power.group_coinc_parents(trigger_nodes, these_time_slides, verbose = verbose): nodes |= power.make_lladd_fragment(dag, parents | binjnodes, "%s_%d" % (tag, n), segment = seg, input_cache = cache | binj_cache, extra_input_cache = extra_input_cache, remove_input = do_injections, preserve_cache = binj_cache | tisi_cache) if enable_clustering: if verbose: print >>sys.stderr, "building post-lladd bucluster jobs ..." nodes = power.make_bucluster_fragment(dag, nodes, "POSTLLADD_%s_%d" % (tag, n), verbose = verbose) if verbose: print >>sys.stderr, "building burca jobs ..." coinc_nodes |= power.make_burca_fragment(dag, nodes, "%s_%d" % (tag, n), verbose = verbose) if verbose: print >>sys.stderr, "done %s %d/%d" % (tag, n + 1, len(time_slides)) # injection identification if do_injections: if verbose: print >>sys.stderr, "building binjfind jobs ..." coinc_nodes = power.make_binjfind_fragment(dag, coinc_nodes, tag, verbose = verbose) # conversion to SQLite database files if verbose: print >>sys.stderr, "building sqlite jobs ..." coinc_nodes = power.make_sqlite_fragment(dag, coinc_nodes, tag, verbose = verbose) # done power.write_output_cache(coinc_nodes, "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], tag)) return coinc_nodes
def make_coinc_branch(dag, datafinds, seglists, time_slides, min_segment_length, pad, overlap, short_segment_duration, tag, vetoes_cache = set(), do_injections = False, injections_offset = 0.0, verbose = False): # # injection job # binjnodes = set() if do_injections: # don't know what to do with more than one list of offset # vectors assert len(time_slides) == 1 # get the largest injection offset's magnitude maxoffset = max(abs(offset) for offsetvectorlist in time_slides.values() for offsetvector in offsetvectorlist for offset in offsetvector.values()) # to save disk space and speed the dag along we don't # generate a single injection list for the entire analysis # run, instead a separate list is constructed for each # block of data to be analyzed. we need to be careful that # two nearby injection lists don't contain injections for # the same time, so we protract the segments by the time # step and coalesce so that only gaps between segments # larger than twice the time step result in separate files # being generated. we could allow smaller gaps to survive, # but this way we don't have to worry about it. # injections_offset is a number between 0 and 1 in units of # the period between injections for seg in seglists.union(seglists).protract(power.binjjob.time_step + maxoffset).coalesce().contract(power.binjjob.time_step + maxoffset): binjnodes |= power.make_binj_fragment(dag, seg.protract(maxoffset), time_slides.keys()[0], tag, offset = injections_offset) # artificial parent-child relationship to induce dagman to # submit binj jobs as the corresponding datafinds complete # instead of submiting all of one kind before any of the next. # makes dag run faster because it allows string search jobs to # start moving onto the cluster without waiting for all the # datafinds and/or all the binjs to complete for datafindnode in datafinds: seg = segments.segment(datafindnode.get_start(), datafindnode.get_end()) for binjnode in binjnodes: if seg.intersects(power.cache_span(binjnode.get_output_cache())): binjnode.add_parent(datafindnode) # # trigger generator jobs # # set max job length to ~3600 s (will be clipped to an allowed # size) trigger_nodes = cosmicstring.make_single_instrument_stage(dag, datafinds, seglists, tag, min_segment_length, pad, overlap, short_segment_duration, max_job_length = 3600, binjnodes = binjnodes, verbose = verbose) # # coincidence analysis # coinc_nodes = [] for n, (time_slides_cache_entry, these_time_slides) in enumerate(time_slides.items()): if verbose: print >>sys.stderr, "%s %d/%d (%s):" % (tag, n + 1, len(time_slides), time_slides_cache_entry.path) coinc_nodes.append(set()) # # lalapps_cafe & ligolw_add # tisi_cache = set([time_slides_cache_entry]) lladd_nodes = set() for seg, parents, cache, clipseg in power.group_coinc_parents(trigger_nodes, these_time_slides, extentlimit = 50000000.0 / (len(these_time_slides) or 1), verbose = verbose): binj_cache = set(cache_entry for node in binjnodes for cache_entry in node.get_output_cache() if cache_entry.segment.intersects(seg)) # otherwise too many copies of the offset vector # will be fed into burca assert len(binj_cache) < 2 if do_injections: # lalapps_binj has already copied the time # slide document into its own output extra_input_cache = vetoes_cache else: # ligolw_add needs to copy the time slide # document into its output extra_input_cache = tisi_cache | vetoes_cache these_lladd_nodes = power.make_lladd_fragment(dag, parents | binjnodes, "%s_%d" % (tag, n), segment = seg, input_cache = cache | binj_cache, extra_input_cache = extra_input_cache, remove_input = do_injections and clipseg is not None, preserve_cache = binj_cache | tisi_cache | vetoes_cache) if clipseg is not None: # # this is a fragment of a too-large burca # job, construct it specially and add the # command-line option needed to clip the # output # assert len(these_lladd_nodes) == 1 coinc_nodes[-1] |= power.make_burca_fragment(dag, these_lladd_nodes, "%s_%d" % (tag, n), coincidence_segments = segments.segmentlist([clipseg]), verbose = verbose) else: # # this is not a fragment of a too-large # burca job, add it to the pool of files to # be processed by the burcas that don't # require special clipping command line # options # lladd_nodes |= these_lladd_nodes # # lalapps_burca pool. these are the burca jobs that don't # require special clipping command line options, and so can # bulk-process many files with each job # if verbose: print >>sys.stderr, "building burca jobs ..." coinc_nodes[-1] |= power.make_burca_fragment(dag, lladd_nodes, "%s_%d" % (tag, n), verbose = verbose) if verbose: print >>sys.stderr, "done %s %d/%d" % (tag, n + 1, len(time_slides)) # # lalapps_binjfind # if do_injections: if verbose: print >>sys.stderr, "building binjfind jobs ..." coinc_nodes = [power.make_binjfind_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), verbose = verbose) for n, these_coinc_nodes in enumerate(coinc_nodes)] # # ligolw_sqlite and lalapps_run_sqlite # if verbose: print >>sys.stderr, "building sqlite jobs ..." coinc_nodes = [power.make_sqlite_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), verbose = verbose) for n, these_coinc_nodes in enumerate(coinc_nodes)] coinc_nodes = [cosmicstring.make_run_sqlite_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), clipsegments_sql_filename) for n, these_coinc_nodes in enumerate(coinc_nodes)] # # lalapps_string_meas_likelihood # if verbose: print >>sys.stderr, "building lalapps_string_meas_likelihood jobs ..." likelihood_nodes = [cosmicstring.make_meas_likelihood_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n)) for n, these_coinc_nodes in enumerate(coinc_nodes)] # # write output cache # if verbose: print >>sys.stderr, "writing output cache ..." for n, (these_coinc_nodes, these_likelihood_nodes) in enumerate(zip(coinc_nodes, likelihood_nodes)): power.write_output_cache(these_coinc_nodes | these_likelihood_nodes, "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], "%s_%d" % (tag, n))) # # done # return coinc_nodes, likelihood_nodes
def make_coinc_branch(dag, datafinds, seglistdict, time_slides, timing_params, psds_per_power, enable_clustering, tag, do_injections=False, verbose=False): # injection list if do_injections: assert len(time_slides) == 1 if verbose: print >> sys.stderr, "Building lalapps_binj jobs ..." binjnodes = power.make_binj_fragment( dag, seglistdict.extent_all(), time_slides.keys()[0], tag, 0.0, float(power.powerjob.get_opts()["low-freq-cutoff"]), float(power.powerjob.get_opts()["low-freq-cutoff"]) + float(power.powerjob.get_opts()["bandwidth"])) # add binj nodes as parents of the datafinds to force the binj's to # be run first. this ensures that once a datafind has run the # power jobs that follow it will immediately be able to run, which # helps depth-first dagman do smarter things. for node in datafinds: for binjnode in binjnodes: node.add_parent(binjnode) else: binjnodes = set() # single-instrument trigger generation trigger_nodes = power.make_single_instrument_stage(dag, datafinds, seglistdict, tag, timing_params, psds_per_power, binjnodes=binjnodes, verbose=verbose) if enable_clustering: if verbose: print >> sys.stderr, "building pre-lladd bucluster jobs ..." trigger_nodes = power.make_bucluster_fragment(dag, trigger_nodes, "PRELLADD_%s" % tag, verbose=verbose) # coincidence analysis coinc_nodes = set() binj_cache = set([ cache_entry for node in binjnodes for cache_entry in node.get_output_cache() ]) # otherwise too many copies of the offset vector will be fed into # burca assert len(binj_cache) < 2 for n, (time_slides_cache_entry, these_time_slides) in enumerate(time_slides.items()): if verbose: print >> sys.stderr, "%s %d/%d (%s):" % ( tag, n + 1, len(time_slides), time_slides_cache_entry.path) tisi_cache = set([time_slides_cache_entry]) if do_injections: # lalapps_binj has already copied the time slide # document into its own output extra_input_cache = set() else: # ligolw_add needs to copy the time slide document # into is output extra_input_cache = tisi_cache nodes = set() for seg, parents, cache, clipseg in power.group_coinc_parents( trigger_nodes, these_time_slides, verbose=verbose): nodes |= power.make_lladd_fragment( dag, parents | binjnodes, "%s_%d" % (tag, n), segment=seg, input_cache=cache | binj_cache, extra_input_cache=extra_input_cache, remove_input=do_injections, preserve_cache=binj_cache | tisi_cache) if enable_clustering: if verbose: print >> sys.stderr, "building post-lladd bucluster jobs ..." nodes = power.make_bucluster_fragment(dag, nodes, "POSTLLADD_%s_%d" % (tag, n), verbose=verbose) if verbose: print >> sys.stderr, "building burca jobs ..." coinc_nodes |= power.make_burca_fragment(dag, nodes, "%s_%d" % (tag, n), verbose=verbose) if verbose: print >> sys.stderr, "done %s %d/%d" % (tag, n + 1, len(time_slides)) # injection identification if do_injections: if verbose: print >> sys.stderr, "building binjfind jobs ..." coinc_nodes = power.make_binjfind_fragment(dag, coinc_nodes, tag, verbose=verbose) # conversion to SQLite database files if verbose: print >> sys.stderr, "building sqlite jobs ..." coinc_nodes = power.make_sqlite_fragment(dag, coinc_nodes, tag, verbose=verbose) # done power.write_output_cache( coinc_nodes, "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], tag)) return coinc_nodes
def make_coinc_branch(dag, datafinds, seglists, time_slides, min_segment_length, pad, overlap, short_segment_duration, tag, vetoes_cache=set(), do_injections=False, injections_offset=0.0, verbose=False): # # injection job # binjnodes = set() if do_injections: # don't know what to do with more than one list of offset # vectors assert len(time_slides) == 1 # get the largest injection offset's magnitude maxoffset = max( abs(offset) for offsetvectorlist in time_slides.values() for offsetvector in offsetvectorlist for offset in offsetvector.values()) # to save disk space and speed the dag along we don't # generate a single injection list for the entire analysis # run, instead a separate list is constructed for each # block of data to be analyzed. we need to be careful that # two nearby injection lists don't contain injections for # the same time, so we protract the segments by the time # step and coalesce so that only gaps between segments # larger than twice the time step result in separate files # being generated. we could allow smaller gaps to survive, # but this way we don't have to worry about it. # injections_offset is a number between 0 and 1 in units of # the period between injections for seg in seglists.union(seglists).protract( power.binjjob.time_step + maxoffset).coalesce().contract(power.binjjob.time_step + maxoffset): binjnodes |= power.make_binj_fragment(dag, seg.protract(maxoffset), time_slides.keys()[0], tag, offset=injections_offset) # artificial parent-child relationship to induce dagman to # submit binj jobs as the corresponding datafinds complete # instead of submiting all of one kind before any of the next. # makes dag run faster because it allows string search jobs to # start moving onto the cluster without waiting for all the # datafinds and/or all the binjs to complete for datafindnode in datafinds: seg = segments.segment(datafindnode.get_start(), datafindnode.get_end()) for binjnode in binjnodes: if seg.intersects(power.cache_span( binjnode.get_output_cache())): binjnode.add_parent(datafindnode) # # trigger generator jobs # # set max job length to ~3600 s (will be clipped to an allowed # size) trigger_nodes = cosmicstring.make_single_instrument_stage( dag, datafinds, seglists, tag, min_segment_length, pad, overlap, short_segment_duration, max_job_length=3600, binjnodes=binjnodes, verbose=verbose) # # coincidence analysis # coinc_nodes = [] for n, (time_slides_cache_entry, these_time_slides) in enumerate(time_slides.items()): if verbose: print("%s %d/%d (%s):" % (tag, n + 1, len(time_slides), time_slides_cache_entry.path), file=sys.stderr) coinc_nodes.append(set()) # # lalapps_cafe & ligolw_add # tisi_cache = set([time_slides_cache_entry]) lladd_nodes = set() for segnum, (seg, parents, cache, clipseg) in enumerate( power.group_coinc_parents(trigger_nodes, these_time_slides, extentlimit=150000000.0 / (len(these_time_slides) or 1), verbose=verbose)): binj_cache = set(cache_entry for node in binjnodes for cache_entry in node.get_output_cache() if cache_entry.segment.intersects(seg)) # otherwise too many copies of the offset vector # will be fed into burca assert len(binj_cache) < 2 if do_injections: # lalapps_binj has already copied the time # slide document into its own output extra_input_cache = vetoes_cache else: # ligolw_add needs to copy the time slide # document into its output extra_input_cache = tisi_cache | vetoes_cache these_lladd_nodes = power.make_lladd_fragment( dag, parents | binjnodes, "%s_%d_%x" % (tag, n, segnum), segment=seg, input_cache=cache | binj_cache | segments_cache, extra_input_cache=extra_input_cache, remove_input=do_injections and clipseg is not None, preserve_cache=binj_cache | segments_cache | tisi_cache | vetoes_cache) if clipseg is not None: # # this is a fragment of a too-large burca # job, construct it specially and add the # command-line option needed to clip the # output # assert len(these_lladd_nodes) == 1 coinc_nodes[-1] |= power.make_burca_fragment( dag, these_lladd_nodes, "%s_%d" % (tag, n), coincidence_segments=segments.segmentlist([clipseg]), verbose=verbose) else: # # this is not a fragment of a too-large # burca job, add it to the pool of files to # be processed by the burcas that don't # require special clipping command line # options # lladd_nodes |= these_lladd_nodes # # lalapps_burca pool. these are the burca jobs that don't # require special clipping command line options, and so can # bulk-process many files with each job # if verbose: print("building burca jobs ...", file=sys.stderr) coinc_nodes[-1] |= power.make_burca_fragment(dag, lladd_nodes, "%s_%d" % (tag, n), verbose=verbose) if verbose: print("done %s %d/%d" % (tag, n + 1, len(time_slides)), file=sys.stderr) # # lalapps_binjfind # if do_injections: if verbose: print("building binjfind jobs ...", file=sys.stderr) coinc_nodes = [ power.make_binjfind_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), verbose=verbose) for n, these_coinc_nodes in enumerate(coinc_nodes) ] # # ligolw_sqlite and lalapps_run_sqlite # if verbose: print("building sqlite jobs ...", file=sys.stderr) coinc_nodes = [ power.make_sqlite_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), verbose=verbose) for n, these_coinc_nodes in enumerate(coinc_nodes) ] coinc_nodes = [ cosmicstring.make_run_sqlite_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), clipsegments_sql_filename) for n, these_coinc_nodes in enumerate(coinc_nodes) ] # # lalapps_string_meas_likelihood # if verbose: print("building lalapps_string_meas_likelihood jobs ...", file=sys.stderr) likelihood_nodes = [ cosmicstring.make_meas_likelihood_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n)) for n, these_coinc_nodes in enumerate(coinc_nodes) ] # # write output cache # if verbose: print("writing output cache ...", file=sys.stderr) for n, (these_coinc_nodes, these_likelihood_nodes) in enumerate( zip(coinc_nodes, likelihood_nodes)): power.write_output_cache( these_coinc_nodes | these_likelihood_nodes, "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], "%s_%d" % (tag, n))) # # done # return coinc_nodes, likelihood_nodes