def make_coinc_branch(dag, datafinds, seglistdict, time_slides, timing_params, psds_per_power, enable_clustering, tag, do_injections = False, verbose = False):
	# injection list


	if do_injections:
		assert len(time_slides) == 1
		if verbose:
			print >>sys.stderr, "Building lalapps_binj jobs ..."
		binjnodes = power.make_binj_fragment(dag, seglistdict.extent_all(), time_slides.keys()[0], tag, 0.0, float(power.powerjob.get_opts()["low-freq-cutoff"]), float(power.powerjob.get_opts()["low-freq-cutoff"]) + float(power.powerjob.get_opts()["bandwidth"]))
		# add binj nodes as parents of the datafinds to force the binj's to
		# be run first.  this ensures that once a datafind has run the
		# power jobs that follow it will immediately be able to run, which
		# helps depth-first dagman do smarter things.
		for node in datafinds:
			for binjnode in binjnodes:
				node.add_parent(binjnode)
	else:
		binjnodes = set()


	# single-instrument trigger generation


	trigger_nodes = power.make_single_instrument_stage(dag, datafinds, seglistdict, tag, timing_params, psds_per_power, binjnodes = binjnodes, verbose = verbose)
	if enable_clustering:
		if verbose:
			print >>sys.stderr, "building pre-lladd bucluster jobs ..."
		trigger_nodes = power.make_bucluster_fragment(dag, trigger_nodes, "PRELLADD_%s" % tag, verbose = verbose)


	# coincidence analysis


	coinc_nodes = set()
	binj_cache = set([cache_entry for node in binjnodes for cache_entry in node.get_output_cache()])
	# otherwise too many copies of the offset vector will be fed into
	# burca
	assert len(binj_cache) < 2
	for n, (time_slides_cache_entry, these_time_slides) in enumerate(time_slides.items()):
		if verbose:
			print >>sys.stderr, "%s %d/%d (%s):" % (tag, n + 1, len(time_slides), time_slides_cache_entry.path)
		tisi_cache = set([time_slides_cache_entry])
		if do_injections:
			# lalapps_binj has already copied the time slide
			# document into its own output
			extra_input_cache = set()
		else:
			# ligolw_add needs to copy the time slide document
			# into is output
			extra_input_cache = tisi_cache
		nodes = set()
		for seg, parents, cache, clipseg in power.group_coinc_parents(trigger_nodes, these_time_slides, verbose = verbose):
			nodes |= power.make_lladd_fragment(dag, parents | binjnodes, "%s_%d" % (tag, n), segment = seg, input_cache = cache | binj_cache, extra_input_cache = extra_input_cache, remove_input = do_injections, preserve_cache = binj_cache | tisi_cache)
		if enable_clustering:
			if verbose:
				print >>sys.stderr, "building post-lladd bucluster jobs ..."
			nodes = power.make_bucluster_fragment(dag, nodes, "POSTLLADD_%s_%d" % (tag, n), verbose = verbose)
		if verbose:
			print >>sys.stderr, "building burca jobs ..."
		coinc_nodes |= power.make_burca_fragment(dag, nodes, "%s_%d" % (tag, n), verbose = verbose)
		if verbose:
			print >>sys.stderr, "done %s %d/%d" % (tag, n + 1, len(time_slides))


	# injection identification


	if do_injections:
		if verbose:
			print >>sys.stderr, "building binjfind jobs ..."
		coinc_nodes = power.make_binjfind_fragment(dag, coinc_nodes, tag, verbose = verbose)


	# conversion to SQLite database files


	if verbose:
		print >>sys.stderr, "building sqlite jobs ..."
	coinc_nodes = power.make_sqlite_fragment(dag, coinc_nodes, tag, verbose = verbose)


	# done


	power.write_output_cache(coinc_nodes, "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], tag))
	return coinc_nodes
def make_coinc_branch(dag, datafinds, seglists, time_slides, min_segment_length, pad, overlap, short_segment_duration, tag, vetoes_cache = set(), do_injections = False, injections_offset = 0.0, verbose = False):
	#
	# injection job
	#

	binjnodes = set()
	if do_injections:
		# don't know what to do with more than one list of offset
		# vectors
		assert len(time_slides) == 1

		# get the largest injection offset's magnitude
		maxoffset = max(abs(offset) for offsetvectorlist in time_slides.values() for offsetvector in offsetvectorlist for offset in offsetvector.values())

		# to save disk space and speed the dag along we don't
		# generate a single injection list for the entire analysis
		# run, instead a separate list is constructed for each
		# block of data to be analyzed.  we need to be careful that
		# two nearby injection lists don't contain injections for
		# the same time, so we protract the segments by the time
		# step and coalesce so that only gaps between segments
		# larger than twice the time step result in separate files
		# being generated.  we could allow smaller gaps to survive,
		# but this way we don't have to worry about it.

		# injections_offset is a number between 0 and 1 in units of
		# the period between injections

		for seg in seglists.union(seglists).protract(power.binjjob.time_step + maxoffset).coalesce().contract(power.binjjob.time_step + maxoffset):
			binjnodes |= power.make_binj_fragment(dag, seg.protract(maxoffset), time_slides.keys()[0], tag, offset = injections_offset)

		# artificial parent-child relationship to induce dagman to
		# submit binj jobs as the corresponding datafinds complete
		# instead of submiting all of one kind before any of the next.
		# makes dag run faster because it allows string search jobs to
		# start moving onto the cluster without waiting for all the
		# datafinds and/or all the binjs to complete

		for datafindnode in datafinds:
			seg = segments.segment(datafindnode.get_start(), datafindnode.get_end())
			for binjnode in binjnodes:
				if seg.intersects(power.cache_span(binjnode.get_output_cache())):
					binjnode.add_parent(datafindnode)

	#
	# trigger generator jobs
	#

	# set max job length to ~3600 s (will be clipped to an allowed
	# size)
	trigger_nodes = cosmicstring.make_single_instrument_stage(dag, datafinds, seglists, tag, min_segment_length, pad, overlap, short_segment_duration, max_job_length = 3600, binjnodes = binjnodes, verbose = verbose)

	#
	# coincidence analysis
	#

	coinc_nodes = []
	for n, (time_slides_cache_entry, these_time_slides) in enumerate(time_slides.items()):
		if verbose:
			print >>sys.stderr, "%s %d/%d (%s):" % (tag, n + 1, len(time_slides), time_slides_cache_entry.path)
		coinc_nodes.append(set())

		#
		# lalapps_cafe & ligolw_add
		#

		tisi_cache = set([time_slides_cache_entry])
		lladd_nodes = set()
		for seg, parents, cache, clipseg in power.group_coinc_parents(trigger_nodes, these_time_slides, extentlimit = 50000000.0 / (len(these_time_slides) or 1), verbose = verbose):
			binj_cache = set(cache_entry for node in binjnodes for cache_entry in node.get_output_cache() if cache_entry.segment.intersects(seg))
			# otherwise too many copies of the offset vector
			# will be fed into burca
			assert len(binj_cache) < 2
			if do_injections:
				# lalapps_binj has already copied the time
				# slide document into its own output
				extra_input_cache = vetoes_cache
			else:
				# ligolw_add needs to copy the time slide
				# document into its output
				extra_input_cache = tisi_cache | vetoes_cache
			these_lladd_nodes = power.make_lladd_fragment(dag, parents | binjnodes, "%s_%d" % (tag, n), segment = seg, input_cache = cache | binj_cache, extra_input_cache = extra_input_cache, remove_input = do_injections and clipseg is not None, preserve_cache = binj_cache | tisi_cache | vetoes_cache)
			if clipseg is not None:
				#
				# this is a fragment of a too-large burca
				# job, construct it specially and add the
				# command-line option needed to clip the
				# output
				#

				assert len(these_lladd_nodes) == 1
				coinc_nodes[-1] |= power.make_burca_fragment(dag, these_lladd_nodes, "%s_%d" % (tag, n), coincidence_segments = segments.segmentlist([clipseg]), verbose = verbose)

			else:
				#
				# this is not a fragment of a too-large
				# burca job, add it to the pool of files to
				# be processed by the burcas that don't
				# require special clipping command line
				# options
				#

				lladd_nodes |= these_lladd_nodes

		#
		# lalapps_burca pool.  these are the burca jobs that don't
		# require special clipping command line options, and so can
		# bulk-process many files with each job
		#

		if verbose:
			print >>sys.stderr, "building burca jobs ..."
		coinc_nodes[-1] |= power.make_burca_fragment(dag, lladd_nodes, "%s_%d" % (tag, n), verbose = verbose)
		if verbose:
			print >>sys.stderr, "done %s %d/%d" % (tag, n + 1, len(time_slides))

	#
	# lalapps_binjfind
	#

	if do_injections:
		if verbose:
			print >>sys.stderr, "building binjfind jobs ..."
		coinc_nodes = [power.make_binjfind_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), verbose = verbose) for n, these_coinc_nodes in enumerate(coinc_nodes)]

	#
	# ligolw_sqlite and lalapps_run_sqlite
	#

	if verbose:
		print >>sys.stderr, "building sqlite jobs ..."
	coinc_nodes = [power.make_sqlite_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), verbose = verbose) for n, these_coinc_nodes in enumerate(coinc_nodes)]
	coinc_nodes = [cosmicstring.make_run_sqlite_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n), clipsegments_sql_filename) for n, these_coinc_nodes in enumerate(coinc_nodes)]

	#
	# lalapps_string_meas_likelihood
	#

	if verbose:
		print >>sys.stderr, "building lalapps_string_meas_likelihood jobs ..."
	likelihood_nodes = [cosmicstring.make_meas_likelihood_fragment(dag, these_coinc_nodes, "%s_%d" % (tag, n)) for n, these_coinc_nodes in enumerate(coinc_nodes)]

	#
	# write output cache
	#

	if verbose:
		print >>sys.stderr, "writing output cache ..."
	for n, (these_coinc_nodes, these_likelihood_nodes) in enumerate(zip(coinc_nodes, likelihood_nodes)):
		power.write_output_cache(these_coinc_nodes | these_likelihood_nodes, "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], "%s_%d" % (tag, n)))

	#
	# done
	#

	return coinc_nodes, likelihood_nodes
Example #3
0
def make_coinc_branch(dag,
                      datafinds,
                      seglistdict,
                      time_slides,
                      timing_params,
                      psds_per_power,
                      enable_clustering,
                      tag,
                      do_injections=False,
                      verbose=False):
    # injection list

    if do_injections:
        assert len(time_slides) == 1
        if verbose:
            print >> sys.stderr, "Building lalapps_binj jobs ..."
        binjnodes = power.make_binj_fragment(
            dag, seglistdict.extent_all(),
            time_slides.keys()[0], tag, 0.0,
            float(power.powerjob.get_opts()["low-freq-cutoff"]),
            float(power.powerjob.get_opts()["low-freq-cutoff"]) +
            float(power.powerjob.get_opts()["bandwidth"]))
        # add binj nodes as parents of the datafinds to force the binj's to
        # be run first.  this ensures that once a datafind has run the
        # power jobs that follow it will immediately be able to run, which
        # helps depth-first dagman do smarter things.
        for node in datafinds:
            for binjnode in binjnodes:
                node.add_parent(binjnode)
    else:
        binjnodes = set()

    # single-instrument trigger generation

    trigger_nodes = power.make_single_instrument_stage(dag,
                                                       datafinds,
                                                       seglistdict,
                                                       tag,
                                                       timing_params,
                                                       psds_per_power,
                                                       binjnodes=binjnodes,
                                                       verbose=verbose)
    if enable_clustering:
        if verbose:
            print >> sys.stderr, "building pre-lladd bucluster jobs ..."
        trigger_nodes = power.make_bucluster_fragment(dag,
                                                      trigger_nodes,
                                                      "PRELLADD_%s" % tag,
                                                      verbose=verbose)

    # coincidence analysis

    coinc_nodes = set()
    binj_cache = set([
        cache_entry for node in binjnodes
        for cache_entry in node.get_output_cache()
    ])
    # otherwise too many copies of the offset vector will be fed into
    # burca
    assert len(binj_cache) < 2
    for n, (time_slides_cache_entry,
            these_time_slides) in enumerate(time_slides.items()):
        if verbose:
            print >> sys.stderr, "%s %d/%d (%s):" % (
                tag, n + 1, len(time_slides), time_slides_cache_entry.path)
        tisi_cache = set([time_slides_cache_entry])
        if do_injections:
            # lalapps_binj has already copied the time slide
            # document into its own output
            extra_input_cache = set()
        else:
            # ligolw_add needs to copy the time slide document
            # into is output
            extra_input_cache = tisi_cache
        nodes = set()
        for seg, parents, cache, clipseg in power.group_coinc_parents(
                trigger_nodes, these_time_slides, verbose=verbose):
            nodes |= power.make_lladd_fragment(
                dag,
                parents | binjnodes,
                "%s_%d" % (tag, n),
                segment=seg,
                input_cache=cache | binj_cache,
                extra_input_cache=extra_input_cache,
                remove_input=do_injections,
                preserve_cache=binj_cache | tisi_cache)
        if enable_clustering:
            if verbose:
                print >> sys.stderr, "building post-lladd bucluster jobs ..."
            nodes = power.make_bucluster_fragment(dag,
                                                  nodes,
                                                  "POSTLLADD_%s_%d" % (tag, n),
                                                  verbose=verbose)
        if verbose:
            print >> sys.stderr, "building burca jobs ..."
        coinc_nodes |= power.make_burca_fragment(dag,
                                                 nodes,
                                                 "%s_%d" % (tag, n),
                                                 verbose=verbose)
        if verbose:
            print >> sys.stderr, "done %s %d/%d" % (tag, n + 1,
                                                    len(time_slides))

    # injection identification

    if do_injections:
        if verbose:
            print >> sys.stderr, "building binjfind jobs ..."
        coinc_nodes = power.make_binjfind_fragment(dag,
                                                   coinc_nodes,
                                                   tag,
                                                   verbose=verbose)

    # conversion to SQLite database files

    if verbose:
        print >> sys.stderr, "building sqlite jobs ..."
    coinc_nodes = power.make_sqlite_fragment(dag,
                                             coinc_nodes,
                                             tag,
                                             verbose=verbose)

    # done

    power.write_output_cache(
        coinc_nodes,
        "%s_%s_output.cache" % (os.path.splitext(dag.get_dag_file())[0], tag))
    return coinc_nodes
Example #4
0
def make_coinc_branch(dag,
                      datafinds,
                      seglists,
                      time_slides,
                      min_segment_length,
                      pad,
                      overlap,
                      short_segment_duration,
                      tag,
                      vetoes_cache=set(),
                      do_injections=False,
                      injections_offset=0.0,
                      verbose=False):
    #
    # injection job
    #

    binjnodes = set()
    if do_injections:
        # don't know what to do with more than one list of offset
        # vectors
        assert len(time_slides) == 1

        # get the largest injection offset's magnitude
        maxoffset = max(
            abs(offset) for offsetvectorlist in time_slides.values()
            for offsetvector in offsetvectorlist
            for offset in offsetvector.values())

        # to save disk space and speed the dag along we don't
        # generate a single injection list for the entire analysis
        # run, instead a separate list is constructed for each
        # block of data to be analyzed.  we need to be careful that
        # two nearby injection lists don't contain injections for
        # the same time, so we protract the segments by the time
        # step and coalesce so that only gaps between segments
        # larger than twice the time step result in separate files
        # being generated.  we could allow smaller gaps to survive,
        # but this way we don't have to worry about it.

        # injections_offset is a number between 0 and 1 in units of
        # the period between injections

        for seg in seglists.union(seglists).protract(
                power.binjjob.time_step +
                maxoffset).coalesce().contract(power.binjjob.time_step +
                                               maxoffset):
            binjnodes |= power.make_binj_fragment(dag,
                                                  seg.protract(maxoffset),
                                                  time_slides.keys()[0],
                                                  tag,
                                                  offset=injections_offset)

        # artificial parent-child relationship to induce dagman to
        # submit binj jobs as the corresponding datafinds complete
        # instead of submiting all of one kind before any of the next.
        # makes dag run faster because it allows string search jobs to
        # start moving onto the cluster without waiting for all the
        # datafinds and/or all the binjs to complete

        for datafindnode in datafinds:
            seg = segments.segment(datafindnode.get_start(),
                                   datafindnode.get_end())
            for binjnode in binjnodes:
                if seg.intersects(power.cache_span(
                        binjnode.get_output_cache())):
                    binjnode.add_parent(datafindnode)

    #
    # trigger generator jobs
    #

    # set max job length to ~3600 s (will be clipped to an allowed
    # size)
    trigger_nodes = cosmicstring.make_single_instrument_stage(
        dag,
        datafinds,
        seglists,
        tag,
        min_segment_length,
        pad,
        overlap,
        short_segment_duration,
        max_job_length=3600,
        binjnodes=binjnodes,
        verbose=verbose)

    #
    # coincidence analysis
    #

    coinc_nodes = []
    for n, (time_slides_cache_entry,
            these_time_slides) in enumerate(time_slides.items()):
        if verbose:
            print("%s %d/%d (%s):" %
                  (tag, n + 1, len(time_slides), time_slides_cache_entry.path),
                  file=sys.stderr)
        coinc_nodes.append(set())

        #
        # lalapps_cafe & ligolw_add
        #

        tisi_cache = set([time_slides_cache_entry])
        lladd_nodes = set()
        for segnum, (seg, parents, cache, clipseg) in enumerate(
                power.group_coinc_parents(trigger_nodes,
                                          these_time_slides,
                                          extentlimit=150000000.0 /
                                          (len(these_time_slides) or 1),
                                          verbose=verbose)):
            binj_cache = set(cache_entry for node in binjnodes
                             for cache_entry in node.get_output_cache()
                             if cache_entry.segment.intersects(seg))
            # otherwise too many copies of the offset vector
            # will be fed into burca
            assert len(binj_cache) < 2
            if do_injections:
                # lalapps_binj has already copied the time
                # slide document into its own output
                extra_input_cache = vetoes_cache
            else:
                # ligolw_add needs to copy the time slide
                # document into its output
                extra_input_cache = tisi_cache | vetoes_cache
            these_lladd_nodes = power.make_lladd_fragment(
                dag,
                parents | binjnodes,
                "%s_%d_%x" % (tag, n, segnum),
                segment=seg,
                input_cache=cache | binj_cache | segments_cache,
                extra_input_cache=extra_input_cache,
                remove_input=do_injections and clipseg is not None,
                preserve_cache=binj_cache | segments_cache | tisi_cache
                | vetoes_cache)
            if clipseg is not None:
                #
                # this is a fragment of a too-large burca
                # job, construct it specially and add the
                # command-line option needed to clip the
                # output
                #

                assert len(these_lladd_nodes) == 1
                coinc_nodes[-1] |= power.make_burca_fragment(
                    dag,
                    these_lladd_nodes,
                    "%s_%d" % (tag, n),
                    coincidence_segments=segments.segmentlist([clipseg]),
                    verbose=verbose)

            else:
                #
                # this is not a fragment of a too-large
                # burca job, add it to the pool of files to
                # be processed by the burcas that don't
                # require special clipping command line
                # options
                #

                lladd_nodes |= these_lladd_nodes

        #
        # lalapps_burca pool.  these are the burca jobs that don't
        # require special clipping command line options, and so can
        # bulk-process many files with each job
        #

        if verbose:
            print("building burca jobs ...", file=sys.stderr)
        coinc_nodes[-1] |= power.make_burca_fragment(dag,
                                                     lladd_nodes,
                                                     "%s_%d" % (tag, n),
                                                     verbose=verbose)
        if verbose:
            print("done %s %d/%d" % (tag, n + 1, len(time_slides)),
                  file=sys.stderr)

    #
    # lalapps_binjfind
    #

    if do_injections:
        if verbose:
            print("building binjfind jobs ...", file=sys.stderr)
        coinc_nodes = [
            power.make_binjfind_fragment(dag,
                                         these_coinc_nodes,
                                         "%s_%d" % (tag, n),
                                         verbose=verbose)
            for n, these_coinc_nodes in enumerate(coinc_nodes)
        ]

    #
    # ligolw_sqlite and lalapps_run_sqlite
    #

    if verbose:
        print("building sqlite jobs ...", file=sys.stderr)
    coinc_nodes = [
        power.make_sqlite_fragment(dag,
                                   these_coinc_nodes,
                                   "%s_%d" % (tag, n),
                                   verbose=verbose)
        for n, these_coinc_nodes in enumerate(coinc_nodes)
    ]
    coinc_nodes = [
        cosmicstring.make_run_sqlite_fragment(dag, these_coinc_nodes,
                                              "%s_%d" % (tag, n),
                                              clipsegments_sql_filename)
        for n, these_coinc_nodes in enumerate(coinc_nodes)
    ]

    #
    # lalapps_string_meas_likelihood
    #

    if verbose:
        print("building lalapps_string_meas_likelihood jobs ...",
              file=sys.stderr)
    likelihood_nodes = [
        cosmicstring.make_meas_likelihood_fragment(dag, these_coinc_nodes,
                                                   "%s_%d" % (tag, n))
        for n, these_coinc_nodes in enumerate(coinc_nodes)
    ]

    #
    # write output cache
    #

    if verbose:
        print("writing output cache ...", file=sys.stderr)
    for n, (these_coinc_nodes, these_likelihood_nodes) in enumerate(
            zip(coinc_nodes, likelihood_nodes)):
        power.write_output_cache(
            these_coinc_nodes | these_likelihood_nodes, "%s_%s_output.cache" %
            (os.path.splitext(dag.get_dag_file())[0], "%s_%d" % (tag, n)))

    #
    # done
    #

    return coinc_nodes, likelihood_nodes