def compute_search_efficiency_in_bins(found, total, ndbins, sim_to_bins_function = lambda sim: (sim.distance,)): """ This program creates the search efficiency in the provided ndbins. The first dimension of ndbins must be the distance. You also must provide a function that maps a sim inspiral row to the correct tuple to index the ndbins. """ input = rate.BinnedRatios(ndbins) # increment the numerator with the missed injections [input.incnumerator(sim_to_bins_function(sim)) for sim in found] # increment the denominator with the total injections [input.incdenominator(sim_to_bins_function(sim)) for sim in total] # regularize by setting denoms to 1 to avoid nans input.regularize() # pull out the efficiency array, it is the ratio eff = rate.BinnedArray(rate.NDBins(ndbins), array = input.ratio()) # compute binomial uncertainties in each bin err_arr = numpy.sqrt(eff.array * (1-eff.array)/input.denominator.array) err = rate.BinnedArray(rate.NDBins(ndbins), array = err_arr) return eff, err
def _bin_events(self, binning = None): # called internally by finish() if binning is None: minx, maxx = min(self.injected_x), max(self.injected_x) miny, maxy = min(self.injected_y), max(self.injected_y) binning = rate.NDBins((rate.LogarithmicBins(minx, maxx, 256), rate.LogarithmicBins(miny, maxy, 256))) self.efficiency = rate.BinnedRatios(binning) for xy in zip(self.injected_x, self.injected_y): self.efficiency.incdenominator(xy) for xy in zip(self.found_x, self.found_y): self.efficiency.incnumerator(xy) # 1 / error^2 is the number of injections that need to be # within the window in order for the fractional uncertainty # in that number to be = error. multiplying by # bins_per_inj tells us how many bins the window needs to # cover, and taking the square root translates that into # the window's length on a side in bins. because the # contours tend to run parallel to the x axis, the window # is dilated in that direction to improve resolution. bins_per_inj = self.efficiency.used() / float(len(self.injected_x)) self.window_size_x = self.window_size_y = math.sqrt(bins_per_inj / self.error**2) self.window_size_x *= math.sqrt(2) self.window_size_y /= math.sqrt(2) if self.window_size_x > 100 or self.window_size_y > 100: # program will take too long to run raise ValueError("smoothing filter too large (not enough injections)") print >>sys.stderr, "The smoothing window for %s is %g x %g bins" % ("+".join(self.instruments), self.window_size_x, self.window_size_y), print >>sys.stderr, "which is %g%% x %g%% of the binning" % (100.0 * self.window_size_x / binning[0].n, 100.0 * self.window_size_y / binning[1].n)
def add_contents(self, contents): if self.tisi_rows is None: # get a list of time slide dictionaries self.tisi_rows = contents.time_slide_table.as_dict().values() # find the largest and smallest offsets min_offset = min(offset for vector in self.tisi_rows for offset in vector.values()) max_offset = max(offset for vector in self.tisi_rows for offset in vector.values()) # a guess at the time slide spacing: works if the # time slides are distributed as a square grid over # the plot area. (max - min)^2 gives the area of # the time slide square in square seconds; dividing # by the length of the time slide list gives the # average area per time slide; taking the square # root of that gives the average distance between # adjacent time slides in seconds time_slide_spacing = ((max_offset - min_offset)**2 / len(self.tisi_rows))**0.5 # use an average of 3 bins per time slide in each # direction, but round to an odd integer nbins = math.ceil( (max_offset - min_offset) / time_slide_spacing * 3) # construct the binning self.bins = rate.BinnedRatios( rate.NDBins((rate.LinearBins(min_offset, max_offset, nbins), rate.LinearBins(min_offset, max_offset, nbins)))) self.seglists |= contents.seglists for offsets in contents.connection.cursor().execute( """ SELECT tx.offset, ty.offset FROM coinc_event JOIN time_slide AS tx ON ( tx.time_slide_id == coinc_event.time_slide_id ) JOIN time_slide AS ty ON ( ty.time_slide_id == coinc_event.time_slide_id ) WHERE coinc_event.coinc_def_id == ? AND tx.instrument == ? AND ty.instrument == ? """, (contents.bb_definer_id, self.x_instrument, self.y_instrument)): try: self.bins.incnumerator(offsets) except IndexError: # beyond plot boundaries pass
def compute_search_efficiency_in_bins(found, total, ndbins, sim_to_bins_function=lambda sim: (sim.distance, )): """ This program creates the search efficiency in the provided ndbins. The first dimension of ndbins must be the distance. You also must provide a function that maps a sim inspiral row to the correct tuple to index the ndbins. """ input = rate.BinnedRatios(ndbins) # increment the numerator with the missed injections [input.incnumerator(sim_to_bins_function(sim)) for sim in found] # increment the denominator with the total injections [input.incdenominator(sim_to_bins_function(sim)) for sim in total] # regularize by setting empty bins to zero efficiency input.denominator.array[input.numerator.array < 1] = 1e35 # pull out the efficiency array, it is the ratio eff = rate.BinnedArray(rate.NDBins(ndbins), array=input.ratio()) # compute binomial uncertainties in each bin k = input.numerator.array N = input.denominator.array eff_lo_arr = (N * (2 * k + 1) - numpy.sqrt(4 * N * k * (N - k) + N**2)) / (2 * N * (N + 1)) eff_hi_arr = (N * (2 * k + 1) + numpy.sqrt(4 * N * k * (N - k) + N**2)) / (2 * N * (N + 1)) eff_lo = rate.BinnedArray(rate.NDBins(ndbins), array=eff_lo_arr) eff_hi = rate.BinnedArray(rate.NDBins(ndbins), array=eff_hi_arr) return eff_lo, eff, eff_hi
def twoD_SearchVolume(self, instruments, dbin=None, FAR=None, bootnum=None, derr=0.197, dsys=0.074): """ Compute the search volume in the mass/mass plane, bootstrap and measure the first and second moment (assumes the underlying distribution can be characterized by those two parameters) This is gonna be brutally slow derr = (0.134**2+.103**2+.102**2)**.5 = 0.197 which is the 3 detector calibration uncertainty in quadrature. This is conservative since some injections will be H1L1 and have a lower error of .17 the dsys is the DC offset which is the max offset of .074. """ if not FAR: FAR = self.far[instruments] found, missed = self.get_injections(instruments, FAR) twodbin = self.twoDMassBins wnfunc = self.gw livetime = self.livetime[instruments] if not bootnum: bootnum = self.bootnum if wnfunc: wnfunc /= wnfunc[(wnfunc.shape[0]-1) / 2, (wnfunc.shape[1]-1) / 2] x = twodbin.shape[0] y = twodbin.shape[1] z = int(self.opts.dist_bins) rArrays = [] volArray=rate.BinnedArray(twodbin) volArray2=rate.BinnedArray(twodbin) #set up ratio arrays for each distance bin for k in range(z): rArrays.append(rate.BinnedRatios(twodbin)) # Bootstrap to account for errors for n in range(bootnum): #initialize by setting these to zero for k in range(z): rArrays[k].numerator.array = numpy.zeros(rArrays[k].numerator.bins.shape) rArrays[k].denominator.array = numpy.zeros(rArrays[k].numerator.bins.shape) #Scramble the inj population and distances if bootnum > 1: sm, sf = self._scramble_pop(missed, found) # I make a separate array of distances to speed up this calculation f_dist = self._scramble_dist(sf, derr, dsys) else: sm, sf = missed, found f_dist = numpy.array([l.distance for l in found]) # compute the distance bins if not dbin: dbin = rate.LogarithmicBins(min(f_dist),max(f_dist), z) #else: print dbin.centres() # get rid of all missed injections outside the distance bins # to prevent binning errors sm, m_dist = self.cut_distance(sm, dbin) sf, f_dist = self.cut_distance(sf, dbin) for i, l in enumerate(sf):#found: tbin = rArrays[dbin[f_dist[i]]] tbin.incnumerator( (l.mass1, l.mass2) ) for i, l in enumerate(sm):#missed: tbin = rArrays[dbin[m_dist[i]]] tbin.incdenominator( (l.mass1, l.mass2) ) tmpArray2=rate.BinnedArray(twodbin) #start with a zero array to compute the mean square for k in range(z): tbins = rArrays[k] tbins.denominator.array += tbins.numerator.array if wnfunc: rate.filter_array(tbins.denominator.array,wnfunc) if wnfunc: rate.filter_array(tbins.numerator.array,wnfunc) tbins.regularize() # logarithmic(d) integrand = 4.0 * pi * tbins.ratio() * dbin.centres()[k]**3 * dbin.delta volArray.array += integrand tmpArray2.array += integrand #4.0 * pi * tbins.ratio() * dbin.centres()[k]**3 * dbin.delta print >>sys.stderr, "bootstrapping:\t%.1f%% and Calculating smoothed volume:\t%.1f%%\r" % ((100.0 * n / bootnum), (100.0 * k / z)), tmpArray2.array *= tmpArray2.array volArray2.array += tmpArray2.array print >>sys.stderr, "" #Mean and variance volArray.array /= bootnum volArray2.array /= bootnum volArray2.array -= volArray.array**2 # Variance volArray.array *= livetime volArray2.array *= livetime*livetime # this gets two powers of live time return volArray, volArray2
def twoD_SearchVolume(found, missed, twodbin, dbin, wnfunc, livetime, bootnum=1, derr=0.197, dsys=0.074): """ Compute the search volume in the mass/mass plane, bootstrap and measure the first and second moment (assumes the underlying distribution can be characterized by those two parameters) This is gonna be brutally slow derr = (0.134**2+.103**2+.102**2)**.5 = 0.197 which is the 3 detector calibration uncertainty in quadrature. This is conservative since some injections will be H1L1 and have a lower error of .17 the dsys is the DC offset which is the max offset of .074. """ if wnfunc: wnfunc /= wnfunc[(wnfunc.shape[0]-1) / 2, (wnfunc.shape[1]-1) / 2] x = twodbin.shape[0] y = twodbin.shape[1] z = dbin.n rArrays = [] volArray=rate.BinnedArray(twodbin) volArray2=rate.BinnedArray(twodbin) #set up ratio arrays for each distance bin for k in range(z): rArrays.append(rate.BinnedRatios(twodbin)) # Bootstrap to account for errors for n in range(bootnum): #initialize by setting these to zero for k in range(z): rArrays[k].numerator.array = numpy.zeros(rArrays[k].numerator.bins.shape) rArrays[k].denominator.array = numpy.zeros(rArrays[k].numerator.bins.shape) #Scramble the inj population if bootnum > 1: sm, sf = scramble_pop(missed, found) else: sm, sf = missed, found for l in sf:#found: tbin = rArrays[dbin[scramble_dist(l.distance,derr,dsys)]] tbin.incnumerator( (l.mass1, l.mass2) ) for l in sm:#missed: tbin = rArrays[dbin[scramble_dist(l.distance,derr,dsys)]] tbin.incdenominator( (l.mass1, l.mass2) ) tmpArray2=rate.BinnedArray(twodbin) #start with a zero array to compute the mean square for k in range(z): tbins = rArrays[k] tbins.denominator.array += tbins.numerator.array if wnfunc: rate.filter_array(tbins.denominator.array,wnfunc) if wnfunc: rate.filter_array(tbins.numerator.array,wnfunc) tbins.regularize() # logarithmic(d) integrand = 4.0 * pi * tbins.ratio() * dbin.centres()[k]**3 * dbin.delta volArray.array += integrand tmpArray2.array += integrand #4.0 * pi * tbins.ratio() * dbin.centres()[k]**3 * dbin.delta print >>sys.stderr, "bootstrapping:\t%.1f%% and Calculating smoothed volume:\t%.1f%%\r" % ((100.0 * n / bootnum), (100.0 * k / z)), tmpArray2.array *= tmpArray2.array volArray2.array += tmpArray2.array print >>sys.stderr, "" #Mean and variance volArray.array /= bootnum volArray2.array /= bootnum volArray2.array -= volArray.array**2 # Variance volArray.array *= livetime volArray2.array *= livetime*livetime # this gets two powers of live time return volArray, volArray2
def finish(self): fig, axes = SnglBurstUtils.make_burst_plot(r"Injection Amplitude (\(\mathrm{s}^{-\frac{1}{3}}\))", "Detection Efficiency", width = 108.0) axes.set_title(r"Detection Efficiency vs.\ Amplitude") axes.semilogx() axes.set_position([0.10, 0.150, 0.86, 0.77]) # set desired yticks axes.set_yticks((0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) axes.set_yticklabels((r"\(0\)", r"\(0.1\)", r"\(0.2\)", r"\(0.3\)", r"\(0.4\)", r"\(0.5\)", r"\(0.6\)", r"\(0.7\)", r"\(0.8\)", r"\(0.9\)", r"\(1.0\)")) axes.xaxis.grid(True, which = "major,minor") axes.yaxis.grid(True, which = "major,minor") # put made and found injections in the denominators and # numerators of the efficiency bins bins = rate.NDBins((rate.LogarithmicBins(min(sim.amplitude for sim in self.all), max(sim.amplitude for sim in self.all), 400),)) efficiency = rate.BinnedRatios(bins) for sim in self.found: efficiency.incnumerator((sim.amplitude,)) for sim in self.all: efficiency.incdenominator((sim.amplitude,)) # generate and plot trend curves. adjust window function # normalization so that denominator array correctly # represents the number of injections contributing to each # bin: make w(0) = 1.0. note that this factor has no # effect on the efficiency because it is common to the # numerator and denominator arrays. we do this for the # purpose of computing the Poisson error bars, which # requires us to know the counts for the bins windowfunc = rate.gaussian_window(self.filter_width) windowfunc /= windowfunc[len(windowfunc) / 2 + 1] rate.filter_binned_ratios(efficiency, windowfunc) # regularize: adjust unused bins so that the efficiency is # 0, not NaN efficiency.regularize() line1, A50, A50_err = render_data_from_bins(file("string_efficiency.dat", "w"), axes, efficiency, self.cal_uncertainty, self.filter_width, colour = "k", linestyle = "-", erroralpha = 0.2) print >>sys.stderr, "Pipeline's 50%% efficiency point for all detections = %g +/- %g%%\n" % (A50, A50_err * 100) # add a legend to the axes axes.legend((line1,), (r"\noindent Injections recovered with $\Lambda > %s$" % SnglBurstUtils.latexnumber("%.2e" % self.detection_threshold),), loc = "lower right") # adjust limits axes.set_xlim([1e-21, 2e-18]) axes.set_ylim([0.0, 1.0]) # # dump some information about the highest-amplitude missed # and quietest-amplitude found injections # self.loudest_missed.sort(reverse = True) self.quietest_found.sort(reverse = True) f = file("string_loud_missed_injections.txt", "w") print >>f, "Highest Amplitude Missed Injections" print >>f, "===================================" for amplitude, sim, offsetvector, filename, likelihood_ratio in self.loudest_missed: print >>f print >>f, "%s in %s:" % (str(sim.simulation_id), filename) if likelihood_ratio is None: print >>f, "Not recovered" else: print >>f, "Recovered with \\Lambda = %.16g, detection threshold was %.16g" % (likelihood_ratio, self.detection_threshold) for instrument in self.seglists: print >>f, "In %s:" % instrument print >>f, "\tInjected amplitude:\t%.16g" % SimBurstUtils.string_amplitude_in_instrument(sim, instrument, offsetvector) print >>f, "\tTime of injection:\t%s s" % sim.time_at_instrument(instrument, offsetvector) print >>f, "Amplitude in waveframe:\t%.16g" % sim.amplitude t = sim.get_time_geocent() print >>f, "Time at geocentre:\t%s s" % t print >>f, "Segments within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.seglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.seglists)) print >>f, "Vetoes within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.vetoseglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.vetoseglists)) f = file("string_quiet_found_injections.txt", "w") print >>f, "Lowest Amplitude Found Injections" print >>f, "=================================" for inv_amplitude, sim, offsetvector, filename, likelihood_ratio in self.quietest_found: print >>f print >>f, "%s in %s:" % (str(sim.simulation_id), filename) if likelihood_ratio is None: print >>f, "Not recovered" else: print >>f, "Recovered with \\Lambda = %.16g, detection threshold was %.16g" % (likelihood_ratio, self.detection_threshold) for instrument in self.seglists: print >>f, "In %s:" % instrument print >>f, "\tInjected amplitude:\t%.16g" % SimBurstUtils.string_amplitude_in_instrument(sim, instrument, offsetvector) print >>f, "\tTime of injection:\t%s s" % sim.time_at_instrument(instrument, offsetvector) print >>f, "Amplitude in waveframe:\t%.16g" % sim.amplitude t = sim.get_time_geocent() print >>f, "Time at geocentre:\t%s s" % t print >>f, "Segments within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.seglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.seglists)) print >>f, "Vetoes within 60 seconds:\t%s" % segmentsUtils.segmentlistdict_to_short_string(self.vetoseglists & segments.segmentlistdict((instrument, segments.segmentlist([segments.segment(t-offsetvector[instrument]-60, t-offsetvector[instrument]+60)])) for instrument in self.vetoseglists)) # # done # return fig,