def do_summary_table(xmldoc, sim_tree, liv_tree): try: search_summary = lsctables.SearchSummaryTable.get_table(xmldoc) except ValueError: search_summary = lsctables.New(lsctables.SearchSummaryTable, [ "process_id", "nevents", "ifos", "comment", "in_start_time", "in_start_time_ns", "out_start_time", "out_start_time_ns", "in_end_time", "in_end_time_ns", "out_end_time", "out_end_time_ns" ]) xmldoc.childNodes[0].appendChild(search_summary) process_id_type = lsctables.ProcessID runids = set() for i in range(0, sim_tree.GetEntries()): sim_tree.GetEntry(i) # Id for the run processed by WaveBurst -> process ID if sim_tree.run in runids: continue row = search_summary.RowType() row.process_id = process_id_type(sim_tree.run) runids.add(sim_tree.run) # Search Summary Table # events found in the run -> nevents setattr(row, "nevents", sim_tree.GetEntries()) # Imstruments involved in the search row.ifos = lsctables.ifos_from_instrument_set( get_ifos_from_index( branch_array_to_list(sim_tree.ifo, sim_tree.ndim))) setattr(row, "comment", "waveburst") # Begin and end time of the segment # TODO: This is a typical offset on either side of the job for artifacts # It can, and probably will change in the future, and should not be hardcoded # TODO: Make this work properly. We need a gps end from the livetime waveoffset = 8 livetime = 600 #live_entries = live_tree.GetEntries() # This is WAAAAAAAAAAAAAY too slow #for l in range(0, live_entries): #liv_tree.GetEntry(l) #livetime = max(livetime, liv_tree.live) #if livetime < 0: #sys.exit("Could not find livetime, cannot fill all of summary table.") # in -- with waveoffset # out -- without waveoffset row.set_in( segments.segment(LIGOTimeGPS(sim_tree.gps - waveoffset), LIGOTimeGPS(sim_tree.gps + livetime + waveoffset))) row.set_out( segments.segment(LIGOTimeGPS(sim_tree.gps), LIGOTimeGPS(sim_tree.gps + livetime))) search_summary.append(row)
def get_output_cache(self): """ Returns a LAL cache of the output file name. Calling this method also induces the output name to get set, so it must be at least once. """ if not self.output_cache: self.output_cache = [ CacheEntry( self.get_ifo(), self.__usertag, segments.segment(LIGOTimeGPS(self.get_start()), LIGOTimeGPS(self.get_end())), "file://localhost" + os.path.abspath(self.get_output())) ] return self.output_cache
def get_sngl_burst_row(self, sngl_burst_table, sim_tree, d): """ Fill in a sngl_burst row for a cwb event """ row = sngl_burst_table.RowType() row.search = u"waveburst" # Interferometer name -> ifo row.ifo = get_ifos_from_index(sim_tree.ifo[d]) # Timing peak = LIGOTimeGPS(sim_tree.time[d]) seg = segments.segment(LIGOTimeGPS(sim_tree.start[d]), LIGOTimeGPS(sim_tree.stop[d])) # Central time in the detector -> cent_time row.set_peak(peak) # Start time in the detector -> end_time row.set_start(seg[0]) # Stop time in the detector -> star_time row.set_stop(seg[1]) # Event duration row.duration = abs(seg) # TODO: Make sure this is right = Time lag used to shift detector -> lag row.time_lag = sim_tree.lag[d] # Frequency # Central frequency in the detector -> frequency row.peak_frequency = sim_tree.frequency[d] try: # Low frequency of the event in the detector -> flow row.flow = sim_tree.low[d] # High frequency of the event in the detector -> fhigh row.fhigh = sim_tree.high[d] except TypeError: row.flow = sim_tree.low row.fhigh = sim_tree.high # Bandwidth row.bandwidth = sim_tree.bandwidth[d] # Shape # number of pizels on the TF plane -> tfsize row.tfvolume = sim_tree.size[d] # Energy # energy / noise variance -> snr row.snr = sim_tree.snr[d]**(1. / 2.) # TODO: What to do with this? GW strain #row.strain = sim_tree.strain[d]) # h _ root square sum row.hrss = sim_tree.hrss[d] return row
def get_multi_burst_row(self, multi_burst_table, sim_tree): """ Fill in a multi_burst row for a cwb event -- these should exactly correlate to the sngl_burst events within a single coherent event """ row = multi_burst_table.RowType() # TODO : Script chokes on the line below: we need a better way to handle this anyway #row.set_peak(sum(LIGOTimeGPS(t) for t in sim_tree.time[0:3]) / int(sim_tree.ndim)) row.set_peak(LIGOTimeGPS(sim_tree.time[0])) return row
def get_output(self): """ Returns the file name of output from the ring code. This must be kept synchronized with the name of the output file in ring.c. """ if self._AnalysisNode__output is None: if None in (self.get_start(), self.get_end(), self.get_ifo(), self.__usertag): raise ValueError, "start time, end time, ifo, or user tag has not been set" seg = segments.segment(LIGOTimeGPS(self.get_start()), LIGOTimeGPS(self.get_end())) self.set_output( os.path.join( self.output_dir, "%s-STRINGSEARCH_%s-%d-%d.xml.gz" % (self.get_ifo(), self.__usertag, int(self.get_start()), int(self.get_end()) - int(self.get_start())))) return self._AnalysisNode__output
def create_xml(ts_data,psd_segment_length,window_fraction,event_list,station,setname="MagneticFields"): __program__ = 'pyburst_excesspower' start_time = LIGOTimeGPS(int(ts_data.start_time)) end_time = LIGOTimeGPS(int(ts_data.end_time)) inseg = segment(start_time,end_time) xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) ifo = 'H1'#channel_name.split(":")[0] straindict = psd.insert_psd_option_group.__dict__ proc_row = register_to_xmldoc(xmldoc, __program__,straindict, ifos=[ifo],version=git_version.id, cvs_repository=git_version.branch, cvs_entry_time=git_version.date) outseg = determine_output_segment(inseg, psd_segment_length, ts_data.sample_rate, window_fraction) ss = append_search_summary(xmldoc, proc_row, ifos=(station,), inseg=inseg, outseg=outseg) for sb in event_list: sb.process_id = proc_row.process_id sb.search = proc_row.program sb.ifo, sb.channel = station, setname xmldoc.childNodes[0].appendChild(event_list) fname = make_filename(station, inseg) utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"))
def get_sngl_burst_row(sngl_burst_table, sim_tree, d): row = sngl_burst_table.RowType() setattr(row, "search", "waveburst") # Interferometer name -> ifo setattr(row, "ifo", get_ifos_from_index(sim_tree.ifo[d])) # Timing peak = LIGOTimeGPS(sim_tree.time[d]) seg = segments.segment(LIGOTimeGPS(sim_tree.start[d]), LIGOTimeGPS(sim_tree.stop[d])) # Central time in the detector -> cent_time row.set_peak(peak) # Start time in the detector -> end_time row.set_start(seg[0]) # Stop time in the detector -> star_time row.set_stop(seg[1]) # Event duration row.duration = abs(seg) # TODO: Make sure this is right = Time lag used to shift detector -> lag setattr(row, "time_lag", sim_tree.lag[d]) # Frequency # Central frequency in the detector -> frequency setattr(row, "peak_frequency", sim_tree.frequency[d]) # Low frequency of the event in the detector -> flow setattr(row, "flow", sim_tree.low[d]) # High frequency of the event in the detector -> fhigh setattr(row, "fhigh", sim_tree.high[d]) # Bandwidth setattr(row, "bandwidth", sim_tree.bandwidth[d]) # Shape # number of pizels on the TF plane -> tfsize setattr(row, "tfvolume", sim_tree.size[d]) # Energy # energy / noise variance -> snr setattr(row, "snr", sim_tree.snr[d]) # TODO: What to do with this? GW strain #setattr(row, "strain", sim_tree.strain[d]) # h _ root square sum setattr(row, "hrss", sim_tree.hrss[d]) return row
def segmentlist_from_hdf5(f, name=None, gpstype=LIGOTimeGPS): """Read a `SegmentList` object from an HDF5 file or group. """ h5file = open_hdf5(f) try: # find dataset if isinstance(h5file, h5py.Dataset): dataset = h5file else: dataset = h5file[name] try: data = dataset[()] except ValueError: data = [] out = SegmentList() for row in data: row = map(int, row) # extract as LIGOTimeGPS start = LIGOTimeGPS(*row[:2]) end = LIGOTimeGPS(*row[2:]) # convert to user type try: start = gpstype(start) except TypeError: start = gpstype(float(start)) try: end = gpstype(end) except TypeError: end = gpstype(float(end)) out.append(Segment(start, end)) finally: if not isinstance(f, (h5py.Dataset, h5py.Group)): h5file.close() return out
def trigger_list_from_map(tfmap, event_list, threshold, start_time, start_freq, duration, band, df, dt, psd=None): # FIXME: If we don't convert this the calculation takes forever --- # but we should convert it once and handle deltaF better later if psd is not None: npy_psd = psd.numpy() start_time = LIGOTimeGPS(float(start_time)) ndof = 2 * duration * band for i, j in zip(*numpy.where(tfmap > threshold)): event = event_list.RowType() # The points are summed forward in time and thus a `summed point' is the # sum of the previous N points. If this point is above threshold, it # corresponds to a tile which spans the previous N points. However, the # 0th point (due to the convolution specifier 'valid') is actually # already a duration from the start time. All of this means, the + # duration and the - duration cancels, and the tile 'start' is, by # definition, the start of the time frequency map if j = 0 # FIXME: I think this needs a + dt/2 to center the tile properly event.set_start(start_time + float(j * dt)) event.set_stop(start_time + float(j * dt) + duration) event.set_peak(event.get_start() + duration / 2) event.central_freq = start_freq + i * df + 0.5 * band event.duration = duration event.bandwidth = band event.chisq_dof = ndof event.snr = math.sqrt(tfmap[i,j] / event.chisq_dof - 1) # FIXME: Magic number 0.62 should be determine empircally event.confidence = -lal.LogChisqCCDF(event.snr * 0.62, event.chisq_dof * 0.62) if psd is not None: # NOTE: I think the pycbc PSDs always start at 0 Hz --- check psd_idx_min = int((event.central_freq - event.bandwidth / 2) / psd.delta_f) psd_idx_max = int((event.central_freq + event.bandwidth / 2) / psd.delta_f) # FIXME: heuristically this works better with E - D -- it's all # going away with the better h_rss calculation soon anyway event.amplitude = measure_hrss_poorly(tfmap[i,j] - event.chisq_dof, npy_psd[psd_idx_min:psd_idx_max]) else: event.amplitude = None event.process_id = None event.event_id = event_list.get_next_id() event_list.append(event)
def read_hveto_triggers(f, columns=HVETO_COLUMNS, filt=None, nproc=1): """Read a `SnglBurstTable` of triggers from an Hveto txt file. """ # allow multiprocessing if nproc != 1: from gwpy.table.io.cache import read_cache return read_cache(f, lsctables.SnglBurstTable.tableName, columns=columns, nproc=nproc, format='hveto') # format list of files if isinstance(f, CacheEntry): files = [f.path] elif isinstance(f, (str, unicode)) and f.endswith(('.cache', '.lcf')): files = open_cache(f).pfnlist() elif isinstance(f, (str, unicode)): files = f.split(',') elif isinstance(f, Cache): files = f.pfnlist() else: files = list(f) # generate output out = lsctables.New(lsctables.SnglBurstTable, columns=columns) append = out.append # iterate over files for f in files: trigs = loadtxt(f, dtype=float) for t, f, snr in trigs: b = lsctables.SnglBurst() b.set_peak(LIGOTimeGPS(float(t))) b.peak_frequency = f b.snr = snr if filt is None or filt(b): append(b) return out
def excess_power2( ts_data, # Time series from magnetic field data psd_segment_length, # Length of each segment in seconds psd_segment_stride, # Separation between 2 consecutive segments in seconds psd_estimation, # Average method window_fraction, # Withening window fraction tile_fap, # Tile false alarm probability threshold in Gaussian noise. station, # Station nchans=None, # Total number of channels band=None, # Channel bandwidth fmin=0, # Lowest frequency of the filter bank. fmax=None, # Highest frequency of the filter bank. max_duration=None, # Maximum duration of the tile wtype='tukey'): # Whitening type, can tukey or hann """ Perform excess-power search analysis on magnetic field data. This method will produce a bunch of time-frequency plots for every tile duration and bandwidth analysed as well as a XML file identifying all the triggers found in the selected data within the user-defined time range. Parameters ---------- ts_data : TimeSeries Time Series from magnetic field data psd_segment_length : float Length of each segment in seconds psd_segment_stride : float Separation between 2 consecutive segments in seconds psd_estimation : string Average method window_fraction : float Withening window fraction tile_fap : float Tile false alarm probability threshold in Gaussian noise. nchans : int Total number of channels band : float Channel bandwidth fmin : float Lowest frequency of the filter bank. fmax : float Highest frequency of the filter bank """ # Determine sampling rate based on extracted time series sample_rate = ts_data.sample_rate # Check if tile maximum frequency is not defined if fmax is None or fmax > sample_rate / 2.: # Set the tile maximum frequency equal to the Nyquist frequency # (i.e. half the sampling rate) fmax = sample_rate / 2.0 # Check whether or not tile bandwidth and channel are defined if band is None and nchans is None: # Exit program with error message exit("Either bandwidth or number of channels must be specified...") else: # Check if tile maximum frequency larger than its minimum frequency assert fmax >= fmin # Define spectral band of data data_band = fmax - fmin # Check whether tile bandwidth or channel is defined if band is not None: # Define number of possible filter bands nchans = int(data_band / band) - 1 elif nchans is not None: # Define filter bandwidth band = data_band / nchans nchans = nchans - 1 # Check if number of channels is superior than unity assert nchans > 1 # Print segment information print '|- Estimating PSD from segments of time', print '%.2f s in length, with %.2f s stride...' % (psd_segment_length, psd_segment_stride) # Convert time series as array of float data = ts_data.astype(numpy.float64) # Define segment length for PSD estimation in sample unit seg_len = int(psd_segment_length * sample_rate) # Define separation between consecutive segments in sample unit seg_stride = int(psd_segment_stride * sample_rate) # Calculate the overall PSD from individual PSD segments fd_psd = psd.welch(data, avg_method=psd_estimation, seg_len=seg_len, seg_stride=seg_stride) # We need this for the SWIG functions... lal_psd = fd_psd.lal() # Plot the power spectral density plot_spectrum(fd_psd) # Create whitening window print "|- Whitening window and spectral correlation..." if wtype == 'hann': window = lal.CreateHannREAL8Window(seg_len) elif wtype == 'tukey': window = lal.CreateTukeyREAL8Window(seg_len, window_fraction) else: raise ValueError("Can't handle window type %s" % wtype) # Create FFT plan fft_plan = lal.CreateForwardREAL8FFTPlan(len(window.data.data), 1) # Perform two point spectral correlation spec_corr = lal.REAL8WindowTwoPointSpectralCorrelation(window, fft_plan) # Initialise filter bank print "|- Create filter..." filter_bank, fdb = [], [] # Loop for each channels for i in range(nchans): channel_flow = fmin + band / 2 + i * band channel_width = band # Create excess power filter lal_filter = lalburst.CreateExcessPowerFilter(channel_flow, channel_width, lal_psd, spec_corr) filter_bank.append(lal_filter) fdb.append(Spectrum.from_lal(lal_filter)) # Calculate the minimum bandwidth min_band = (len(filter_bank[0].data.data) - 1) * filter_bank[0].deltaF / 2 # Plot filter bank plot_bank(fdb) # Convert filter bank from frequency to time domain print "|- Convert all the frequency domain to the time domain..." tdb = [] # Loop for each filter's spectrum for fdt in fdb: zero_padded = numpy.zeros(int((fdt.f0 / fdt.df).value) + len(fdt)) st = int((fdt.f0 / fdt.df).value) zero_padded[st:st + len(fdt)] = numpy.real_if_close(fdt.value) n_freq = int(sample_rate / 2 / fdt.df.value) * 2 tdt = numpy.fft.irfft(zero_padded, n_freq) * math.sqrt(sample_rate) tdt = numpy.roll(tdt, len(tdt) / 2) tdt = TimeSeries(tdt, name="", epoch=fdt.epoch, sample_rate=sample_rate) tdb.append(tdt) # Plot time series filter plot_filters(tdb, fmin, band) # Compute the renormalization for the base filters up to a given bandwidth. mu_sq_dict = {} # Loop through powers of 2 up to number of channels for nc_sum in range(0, int(math.log(nchans, 2))): nc_sum = 2**nc_sum - 1 print "|- Calculating renormalization for resolution level containing %d %fHz channels" % ( nc_sum + 1, min_band) mu_sq = (nc_sum + 1) * numpy.array([ lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, None) for f in filter_bank ]) # Uncomment to get all possible frequency renormalizations #for n in xrange(nc_sum, nchans): # channel position index for n in xrange(nc_sum, nchans, nc_sum + 1): # channel position index for k in xrange(0, nc_sum): # channel sum index # FIXME: We've precomputed this, so use it instead mu_sq[n] += 2 * lalburst.ExcessPowerFilterInnerProduct( filter_bank[n - k], filter_bank[n - 1 - k], spec_corr, None) #print mu_sq[nc_sum::nc_sum+1] mu_sq_dict[nc_sum] = mu_sq # Create an event list where all the triggers will be stored event_list = lsctables.New(lsctables.SnglBurstTable, [ 'start_time', 'start_time_ns', 'peak_time', 'peak_time_ns', 'duration', 'bandwidth', 'central_freq', 'chisq_dof', 'confidence', 'snr', 'amplitude', 'channel', 'ifo', 'process_id', 'event_id', 'search', 'stop_time', 'stop_time_ns' ]) # Create repositories to save TF and time series plots os.system('mkdir -p segments/time-frequency') os.system('mkdir -p segments/time-series') # Define time edges t_idx_min, t_idx_max = 0, seg_len while t_idx_max <= len(ts_data): # Define starting and ending time of the segment in seconds start_time = ts_data.start_time + t_idx_min / float( ts_data.sample_rate) end_time = ts_data.start_time + t_idx_max / float(ts_data.sample_rate) print "\n|-- Analyzing block %i to %i (%.2f percent)" % ( start_time, end_time, 100 * float(t_idx_max) / len(ts_data)) # Model a withen time series for the block tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max] * window.data.data, delta_t=1. / ts_data.sample_rate, epoch=start_time) # Save time series in relevant repository segfolder = 'segments/%i-%i' % (start_time, end_time) os.system('mkdir -p ' + segfolder) plot_ts(tmp_ts_data, fname='segments/time-series/%i-%i.png' % (start_time, end_time)) # Convert times series to frequency series fs_data = tmp_ts_data.to_frequencyseries() print "|-- Frequency series data has variance: %s" % fs_data.data.std( )**2 # Whitening (FIXME: Whiten the filters, not the data) fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f) print "|-- Whitened frequency series data has variance: %s" % fs_data.data.std( )**2 print "|-- Create time-frequency plane for current block" # Return the complex snr, along with its associated normalization of the template, # matched filtered against the data #filter.matched_filter_core(types.FrequencySeries(tmp_filter_bank,delta_f=fd_psd.delta_f), # fs_data,h_norm=1,psd=fd_psd,low_frequency_cutoff=filter_bank[0].f0, # high_frequency_cutoff=filter_bank[0].f0+2*band) print "|-- Filtering all %d channels..." % nchans # Initialise 2D zero array tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128) # Initialise 2D zero array for time-frequency map tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128) # Loop over all the channels for i in range(nchans): # Reset filter bank series tmp_filter_bank *= 0.0 # Index of starting frequency f1 = int(filter_bank[i].f0 / fd_psd.delta_f) # Index of ending frequency f2 = int((filter_bank[i].f0 + 2 * band) / fd_psd.delta_f) + 1 # (FIXME: Why is there a factor of 2 here?) tmp_filter_bank[f1:f2] = filter_bank[i].data.data * 2 # Define the template to filter the frequency series with template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False) # Create filtered series filtered_series = filter.matched_filter_core( template, fs_data, h_norm=None, psd=None, low_frequency_cutoff=filter_bank[i].f0, high_frequency_cutoff=filter_bank[i].f0 + 2 * band) # Include filtered series in the map tf_map[i, :] = filtered_series[0].numpy() # Plot spectrogram plot_spectrogram(numpy.abs(tf_map).T, tmp_ts_data.delta_t, band, ts_data.sample_rate, start_time, end_time, fname='segments/time-frequency/%i-%i.png' % (start_time, end_time)) # Loop through all summed channels for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: nc_sum = 2**nc_sum - 1 mu_sq = mu_sq_dict[nc_sum] # Clip the boundaries to remove window corruption clip_samples = int(psd_segment_length * window_fraction * ts_data.sample_rate / 2) # Constructing tile and calculate their energy print "\n|--- Constructing tile with %d summed channels..." % ( nc_sum + 1) # Current bandwidth of the time-frequency map tiles df = band * (nc_sum + 1) dt = 1.0 / (2 * df) # How much each "step" is in the time domain -- under sampling rate us_rate = int(round(dt / ts_data.delta_t)) print "|--- Undersampling rate for this level: %f" % ( ts_data.sample_rate / us_rate) print "|--- Calculating tiles..." # Making independent tiles # because [0:-0] does not give the full array tf_map_temp = tf_map[:,clip_samples:-clip_samples:us_rate] \ if clip_samples > 0 else tf_map[:,::us_rate] tiles = tf_map_temp.copy() # Here's the deal: we're going to keep only the valid output and # it's *always* going to exist in the lowest available indices stride = nc_sum + 1 for i in xrange(tiles.shape[0] / stride): numpy.absolute(tiles[stride * i:stride * (i + 1)].sum(axis=0), tiles[stride * (i + 1) - 1]) tiles = tiles[nc_sum::nc_sum + 1].real**2 / mu_sq[nc_sum::nc_sum + 1].reshape( -1, 1) print "|--- TF-plane is %dx%s samples" % tiles.shape print "|--- Tile energy mean %f, var %f" % (numpy.mean(tiles), numpy.var(tiles)) # Define maximum number of degrees of freedom and check it larger or equal to 2 max_dof = 32 if max_duration == None else 2 * max_duration * df assert max_dof >= 2 # Loop through multiple degrees of freedom for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]: # Duration is fixed by the NDOF and bandwidth duration = j * dt print "\n|----- Explore signal duration of %f s..." % duration print "|----- Summing DOF = %d ..." % (2 * j) tlen = tiles.shape[1] - 2 * j + 1 + 1 dof_tiles = numpy.zeros((tiles.shape[0], tlen)) sum_filter = numpy.array([1, 0] * (j - 1) + [1]) for f in range(tiles.shape[0]): # Sum and drop correlate tiles dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid') print "|----- Summed tile energy mean: %f, var %f" % ( numpy.mean(dof_tiles), numpy.var(dof_tiles)) plot_spectrogram( dof_tiles.T, dt, df, ts_data.sample_rate, start_time, end_time, fname='segments/%i-%i/tf_%02ichans_%02idof.png' % (start_time, end_time, nc_sum + 1, 2 * j)) threshold = scipy.stats.chi2.isf(tile_fap, j) print "|------ Threshold for this level: %f" % threshold spant, spanf = dof_tiles.shape[1] * dt, dof_tiles.shape[0] * df print "|------ Processing %.2fx%.2f time-frequency map." % ( spant, spanf) # Since we clip the data, the start time needs to be adjusted accordingly window_offset_epoch = fs_data.epoch + psd_segment_length * window_fraction / 2 window_offset_epoch = LIGOTimeGPS(float(window_offset_epoch)) for i, j in zip(*numpy.where(dof_tiles > threshold)): event = event_list.RowType() # The points are summed forward in time and thus a `summed point' is the # sum of the previous N points. If this point is above threshold, it # corresponds to a tile which spans the previous N points. However, the # 0th point (due to the convolution specifier 'valid') is actually # already a duration from the start time. All of this means, the + # duration and the - duration cancels, and the tile 'start' is, by # definition, the start of the time frequency map if j = 0 # FIXME: I think this needs a + dt/2 to center the tile properly event.set_start(window_offset_epoch + float(j * dt)) event.set_stop(window_offset_epoch + float(j * dt) + duration) event.set_peak(event.get_start() + duration / 2) event.central_freq = filter_bank[ 0].f0 + band / 2 + i * df + 0.5 * df event.duration = duration event.bandwidth = df event.chisq_dof = 2 * duration * df event.snr = math.sqrt(dof_tiles[i, j] / event.chisq_dof - 1) # FIXME: Magic number 0.62 should be determine empircally event.confidence = -lal.LogChisqCCDF( event.snr * 0.62, event.chisq_dof * 0.62) event.amplitude = None event.process_id = None event.event_id = event_list.get_next_id() event_list.append(event) for event in event_list[::-1]: if event.amplitude != None: continue etime_min_idx = float(event.get_start()) - float( fs_data.epoch) etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t) etime_max_idx = float(event.get_start()) - float( fs_data.epoch) + event.duration etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t) # (band / 2) to account for sin^2 wings from finest filters flow_idx = int((event.central_freq - event.bandwidth / 2 - (df / 2) - fmin) / df) fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (df / 2) - fmin) / df) # TODO: Check that the undersampling rate is always commensurate # with the indexing: that is to say that # mod(etime_min_idx, us_rate) == 0 always z_j_b = tf_map[flow_idx:fhigh_idx, etime_min_idx:etime_max_idx:us_rate] event.amplitude = 0 print "|------ Total number of events: %d" % len(event_list) t_idx_min += int(seg_len * (1 - window_fraction)) t_idx_max += int(seg_len * (1 - window_fraction)) setname = "MagneticFields" __program__ = 'pyburst_excesspower' start_time = LIGOTimeGPS(int(ts_data.start_time)) end_time = LIGOTimeGPS(int(ts_data.end_time)) inseg = segment(start_time, end_time) xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) ifo = 'H1' #channel_name.split(":")[0] straindict = psd.insert_psd_option_group.__dict__ proc_row = register_to_xmldoc(xmldoc, __program__, straindict, ifos=[ifo], version=git_version.id, cvs_repository=git_version.branch, cvs_entry_time=git_version.date) dt_stride = psd_segment_length sample_rate = ts_data.sample_rate # Amount to overlap successive blocks so as not to lose data window_overlap_samples = window_fraction * sample_rate outseg = inseg.contract(window_fraction * dt_stride / 2) # With a given dt_stride, we cannot process the remainder of this data remainder = math.fmod(abs(outseg), dt_stride * (1 - window_fraction)) # ...so make an accounting of it outseg = segment(outseg[0], outseg[1] - remainder) ss = append_search_summary(xmldoc, proc_row, ifos=(station, ), inseg=inseg, outseg=outseg) for sb in event_list: sb.process_id = proc_row.process_id sb.search = proc_row.program sb.ifo, sb.channel = station, setname xmldoc.childNodes[0].appendChild(event_list) fname = 'excesspower.xml.gz' utils.write_filename(xmldoc, fname, gz=fname.endswith("gz"))
else: seglist.extend(dqsegs.threshold_data_to_seglist(data, start, dt, min_threshold=min_threshold, max_threshold=max_threshold, invert=invert)) seglist.coalesce() all_segs["%s: %s" % (channel, str(opthresholds))] = seglist if opts.verbose: print "Summary of segments for %s %s %s %s" % (inst, channel_name, op, str(threshold)) int_time = reduce(float.__add__, [float(abs(seg & s)) for s in seglist], 0.0) print "Total time covered from cache: %f" % int_time for i, seg in enumerate(seglist): # # Segments are expected in LIGOTimeGPS format # seglist[i] = segment(LIGOTimeGPS(seg[0]), LIGOTimeGPS(seg[1])) if opts.verbose: print "\t" + str(seg) # # Fill in some metadata about the flags # name = "detchar %s threshold flags" % channel comment = " ".join(["%s %s %s" % (channel, stringify(op), str(v)) for op, v in opthresholds]) lwsegs.insert_from_segmentlistdict(segmentlistdict({channel[:2]: seglist}), name=name, comment=comment) # # After recording segments, one can take the intersection (all must be on) or # union (any can be on) # # Possible enhancement: instead of giving all keys, give a user selection
def fake_trigger_generator(instrument='H1'): """ Generate fake trigger maps. Parameters ---------- instrument : str Instrument name """ xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) # Process information proc = process.append_process(xmldoc, "fake_search") process.append_process_params(xmldoc, proc, {}) t0 = 1e9 ntrig = 1000 ifo = instrument inseg = segment(LIGOTimeGPS(t0), LIGOTimeGPS(t0 + ntrig / 10)) outseg = segment(LIGOTimeGPS(t0), LIGOTimeGPS(t0 + ntrig / 10)) # Search summary search_summary.append_search_summary(xmldoc, proc, comment="Fake triggers", ifos=(ifo, ), inseg=inseg, outseg=outseg) columns = [ 'chisq_dof', 'bandwidth', 'central_freq', 'confidence', 'peak_time_ns', 'start_time', 'process_id', 'fhigh', 'stop_time_ns', 'channel', 'ifo', 'duration', 'event_id', 'hrss', 'stop_time', 'peak_time', 'snr', 'search', 'start_time_ns', 'flow', 'amplitude' ] table = lsctables.New(lsctables.SnglBurstTable, columns) # Generate uniformly distributed trigger times with approximate rate of 10 s times = t0 + uniform.rvs(0, ntrig / 10., ntrig) for t in times: row = table.RowType() # time frequency position and extent row.chisq_dof = int(2 + expon.rvs(2)) row.duration = 1. / 2**int(uniform.rvs(0, 7)) row.bandwidth = row.chisq_dof / row.duration / 2 row.central_freq = uniform.rvs(16, 2048) row.flow = max(row.central_freq - row.bandwidth, 0) row.fhigh = min(row.central_freq + row.bandwidth, 2048) ns, sec = math.modf(t) ns = int("%09d" % (ns * 1e9)) row.peak_time, row.peak_time_ns = int(sec), ns ns, sec = math.modf(t - row.duration / 2) ns = int("%09d" % (ns * 1e9)) row.start_time, row.start_time_ns = int(sec), ns ns, sec = math.modf(t + row.duration / 2) ns = int("%09d" % (ns * 1e9)) row.stop_time, row.stop_time_ns = int(sec), ns # TODO: Correlate some triggers, an upward fluctuation often triggers a few # tiles ontop of each other # SNR and confidence row.snr = 5. while row.snr < 2 * row.chisq_dof: row.snr = chi2.rvs(row.chisq_dof) row.confidence = chi2.sf(row.snr, row.chisq_dof) row.snr = math.sqrt(row.snr / row.chisq_dof - 1) row.hrss = row.amplitude = 1e-21 # metadata row.search = "fake_search" row.channel = "FAKE" row.ifo = ifo row.event_id = table.get_next_id() row.process_id = proc.process_id table.append(row) xmldoc.childNodes[0].appendChild(table) utils.write_filename(xmldoc, "%s-FAKE_SEARCH-%d-%d.xml.gz" % (ifo, int(t0), 10000), gz=True)
__author__ = "Chris Pankow <*****@*****.**>" def assign_id(row, i): row.simulation_id = ilwd.ilwdchar("sim_inspiral_table:sim_inspiral:%d" % i) CMAP = { "right_ascension": "longitude", "longitude": "longitude", "latitude": "latitude", "declination": "latitude", "inclination": "inclination", "polarization": "polarization", "t_ref": lambda r, t: r.set_time_geocent(LIGOTimeGPS(float(t))), "coa_phase": "coa_phase", "distance": "distance", "mass1": "mass1", "mass2": "mass2", "lam_tilde": "psi0", "dlam_tilde": "psi3", "psi0": "psi0", "psi3": "psi3", # SHOEHORN ALERT "sample_n": assign_id, "alpha1": "alpha1", "alpha2": "alpha2", "alpha3": "alpha3", "loglikelihood": "alpha1", "joint_prior": "alpha2",
def do_summary_table(self, xmldoc, sim_tree): """ Create the search_summary table for the cWB job(s). This function exists as a backup in case no job list exists. It will try and reconstruct the job segments from the event data, but this list will be incomplete in the case where no events were found during a job. """ try: search_summary = lsctables.SearchSummaryTable.get_table(xmldoc) except ValueError: search_summary = lsctables.New(lsctables.SearchSummaryTable, [ "process_id", "nevents", "ifos", "comment", "in_start_time", "in_start_time_ns", "out_start_time", "out_start_time_ns", "in_end_time", "in_end_time_ns", "out_end_time", "out_end_time_ns" ]) xmldoc.childNodes[0].appendChild(search_summary) process_id_type = type( lsctables.ProcessTable.get_table(xmldoc).next_id) runids = set() entries = sim_tree.GetEntries() for i in range(0, entries): sim_tree.GetEntry(i) if self.start != None: if float(self.start) > sim_tree.start[0]: continue if self.end != None: if float(self.end) < sim_tree.stop[0]: continue # Id for the run processed by WaveBurst -> process ID run = sim_tree.run if run in runids: continue row = search_summary.RowType() row.process_id = process_id_type(run) runids.add(run) # Search Summary Table # events found in the run -> nevents # TODO: Destroy ROOT, because it hates me #row.nevents = sim_tree.nevent row.nevents = 0 # Imstruments involved in the search ifos = lsctables.ifos_from_instrument_set( get_ifos_from_index( branch_array_to_list(sim_tree.ifo, sim_tree.ndim))) # Imstruments involved in the search if (ifos == None or len(ifos) == 0): if (self.instruments): ifos = self.instruments else: # Not enough information to completely fill out the table sys.exit( "Found a job with no IFOs on, or not enough to determine IFOs. Try specifying instruments directly." ) row.ifos = ifos row.comment = "waveburst" # Begin and end time of the segment if self.waveoffset != None: waveoffset = self.waveoffset else: waveoffset = 0 livetime = -1 # WARNING: This will fail miserably if there are no events in analyzed # see do_search_summary_from_joblist for a better solution livetime = sim_tree.left[0] + sim_tree.duration[ 0] + sim_tree.right[0] if livetime < 0: print >> sys.stderr, "WARNING: Run %d will have zero livetime because no events are recorded in this run, and therefore livetime cannot be calculated." % run # in -- with waveoffset row.set_in(segments.segment(LIGOTimeGPS(0), LIGOTimeGPS(1))) # out -- without waveoffset row.set_out(segments.segment(LIGOTimeGPS(0), LIGOTimeGPS(1))) else: seg_start_with_offset = sim_tree.gps - waveoffset seg_start_without_offset = sim_tree.gps seg_end_with_offset = sim_tree.gps + waveoffset + livetime seg_end_without_offset = sim_tree.gps + livetime # in -- with waveoffset row.set_in( segments.segment(LIGOTimeGPS(seg_start_with_offset), LIGOTimeGPS(seg_end_with_offset))) # out -- without waveoffset row.set_out( segments.segment(LIGOTimeGPS(seg_start_without_offset), LIGOTimeGPS(seg_end_without_offset))) search_summary.append(row)
def do_summary_table_from_joblist(self, xmldoc, sim_tree): """ Create the search_summary table for the cWB job(s) and a provided cWB joblist. The function will try to determine the proper job intervals from the waveoffset, if specified. """ try: search_summary = lsctables.SearchSummaryTable.get_table(xmldoc) except ValueError: search_summary = lsctables.New(lsctables.SearchSummaryTable, [ "process_id", "nevents", "ifos", "comment", "in_start_time", "in_start_time_ns", "out_start_time", "out_start_time_ns", "in_end_time", "in_end_time_ns", "out_end_time", "out_end_time_ns" ]) xmldoc.childNodes[0].appendChild(search_summary) process_id_type = type( lsctables.ProcessTable.get_table(xmldoc).next_id) runid = dict() itr = 0 for line in file(self.job_list): if line[0] == '#': continue # skip comments # Determine the file type # WARNING: Mixing the two types could cause overwrite behavior line = line.split() if len(line) == 2: # start and stop seg = segments.segment(map(LIGOTimeGPS, map(float, line[0:2]))) runid[itr] = seg itr += 1 elif len(line) == 3: # index start and stop seg = segments.segment(map(LIGOTimeGPS, map(float, line[1:3]))) runid[int(line[0])] = seg elif len(line) == 4: # index start and stop and length seg = segments.segment(map(LIGOTimeGPS, map(float, line[1:3]))) runid[int(line[0])] = seg else: # dunno! sys.exit("Unable to understand job list segment format.") for run, seg in runid.iteritems(): if self.start != None: if float(self.start) > seg[0]: continue if self.end != None: if float(self.end) < seg[1]: continue row = search_summary.RowType() row.process_id = process_id_type(run) # Search Summary Table # events found in the run -> nevents row.nevents = 0 #entries = sim_tree.GetEntries() # Imstruments involved in the search sim_tree.GetEntry(0) ifos = lsctables.ifos_from_instrument_set( get_ifos_from_index( branch_array_to_list(sim_tree.ifo, sim_tree.ndim))) # Imstruments involved in the search if (ifos == None or len(ifos) == 0): if (self.instruments): ifos = self.instruments else: # Not enough information to completely fill out the table sys.exit( "Found a job with no IFOs on, or not enough to determine IFOs. Try specifying instruments directly." ) row.ifos = ifos row.comment = "waveburst" # Begin and end time of the segment # TODO: This is a typical offset on either side of the job for artifacts # It can, and probably will change in the future, and shouldn't be hardcoded #waveoffset, livetime = 8, -1 waveoffset, livetime = self.waveoffset, -1 if waveoffset == None: waveoffset = 0 livetime = abs(seg) if livetime < 0: print >> sys.stderr, "WARNING: Run %d will have zero livetime because no events are recorded in this run, and therefore livetime cannot be calculated." % run # in -- with waveoffset row.set_in(segments.segment(LIGOTimeGPS(0), LIGOTimeGPS(1))) # out -- without waveoffset row.set_out(segments.segment(LIGOTimeGPS(0), LIGOTimeGPS(1))) else: # in -- with waveoffset row.set_in(seg) # out -- without waveoffset row.set_out( segments.segment(seg[0] + waveoffset, seg[1] - waveoffset)) search_summary.append(row)
def excess_power( ts_data, # Time series from magnetic field data band=None, # Channel bandwidth channel_name='channel-name', # Channel name fmin=0, # Lowest frequency of the filter bank. fmax=None, # Highest frequency of the filter bank. impulse=False, # Impulse response make_plot=True, # Condition to produce plots max_duration=None, # Maximum duration of the tile nchans=256, # Total number of channels psd_estimation='median-mean', # Average method psd_segment_length=60, # Length of each segment in seconds psd_segment_stride=30, # Separation between 2 consecutive segments in seconds station='station-name', # Station name tile_fap=1e-7, # Tile false alarm probability threshold in Gaussian noise. verbose=True, # Print details window_fraction=0, # Withening window fraction wtype='tukey'): # Whitening type, can tukey or hann ''' Perform excess-power search analysis on magnetic field data. This method will produce a bunch of time-frequency plots for every tile duration and bandwidth analysed as well as a XML file identifying all the triggers found in the selected data within the user-defined time range. Parameters ---------- ts_data : TimeSeries Time Series from magnetic field data psd_segment_length : float Length of each segment in seconds psd_segment_stride : float Separation between 2 consecutive segments in seconds psd_estimation : string Average method window_fraction : float Withening window fraction tile_fap : float Tile false alarm probability threshold in Gaussian noise. nchans : int Total number of channels band : float Channel bandwidth fmin : float Lowest frequency of the filter bank. fmax : float Highest frequency of the filter bank Examples -------- The program can be ran as an executable by using the ``excesspower`` command line as follows:: excesspower --station "mainz01" \\ --start-time "2017-04-15-17-1" \\ --end-time "2017-04-15-18" \\ --rep "/Users/vincent/ASTRO/data/GNOME/GNOMEDrive/gnome/serverdata/" \\ --resample 512 \\ --verbose ''' # Determine sampling rate based on extracted time series sample_rate = ts_data.sample_rate # Check if tile maximum frequency is not defined if fmax is None or fmax > sample_rate / 2.: # Set the tile maximum frequency equal to the Nyquist frequency # (i.e. half the sampling rate) fmax = sample_rate / 2.0 # Check whether or not tile bandwidth and channel are defined if band is None and nchans is None: # Exit program with error message exit("Either bandwidth or number of channels must be specified...") else: # Check if tile maximum frequency larger than its minimum frequency assert fmax >= fmin # Define spectral band of data data_band = fmax - fmin # Check whether tile bandwidth or channel is defined if band is not None: # Define number of possible filter bands nchans = int(data_band / band) elif nchans is not None: # Define filter bandwidth band = data_band / nchans nchans -= 1 # Check if number of channels is superior than unity assert nchans > 1 # Print segment information if verbose: print '|- Estimating PSD from segments of', if verbose: print '%.2f s, with %.2f s stride...' % (psd_segment_length, psd_segment_stride) # Convert time series as array of float data = ts_data.astype(numpy.float64) # Define segment length for PSD estimation in sample unit seg_len = int(psd_segment_length * sample_rate) # Define separation between consecutive segments in sample unit seg_stride = int(psd_segment_stride * sample_rate) # Minimum frequency of detectable signal in a segment delta_f = 1. / psd_segment_length # Calculate PSD length counting the zero frequency element fd_len = fmax / delta_f + 1 # Calculate the overall PSD from individual PSD segments if impulse: # Produce flat data flat_data = numpy.ones(int(fd_len)) * 2. / fd_len # Create PSD frequency series fd_psd = types.FrequencySeries(flat_data, 1. / psd_segment_length, ts_data.start_time) else: # Create overall PSD using Welch's method fd_psd = psd.welch(data, avg_method=psd_estimation, seg_len=seg_len, seg_stride=seg_stride) if make_plot: # Plot the power spectral density plot_spectrum(fd_psd) # We need this for the SWIG functions lal_psd = fd_psd.lal() # Create whitening window if verbose: print "|- Whitening window and spectral correlation..." if wtype == 'hann': window = lal.CreateHannREAL8Window(seg_len) elif wtype == 'tukey': window = lal.CreateTukeyREAL8Window(seg_len, window_fraction) else: raise ValueError("Can't handle window type %s" % wtype) # Create FFT plan fft_plan = lal.CreateForwardREAL8FFTPlan(len(window.data.data), 1) # Perform two point spectral correlation spec_corr = lal.REAL8WindowTwoPointSpectralCorrelation(window, fft_plan) # Determine length of individual filters filter_length = int(2 * band / fd_psd.delta_f) + 1 # Initialise filter bank if verbose: print "|- Create bank of %i filters of %i Hz bandwidth..." % ( nchans, filter_length) # Initialise array to store filter's frequency series and metadata lal_filters = [] # Initialise array to store filter's time series fdb = [] # Loop over the channels for i in range(nchans): # Define central position of the filter freq = fmin + band / 2 + i * band # Create excess power filter lal_filter = lalburst.CreateExcessPowerFilter(freq, band, lal_psd, spec_corr) # Testing spectral correlation on filter #print lalburst.ExcessPowerFilterInnerProduct(lal_filter, lal_filter, spec_corr, None) # Append entire filter structure lal_filters.append(lal_filter) # Append filter's spectrum fdb.append(FrequencySeries.from_lal(lal_filter)) #print fdb[0].frequencies #print fdb[0] if make_plot: # Plot filter bank plot_bank(fdb) # Convert filter bank from frequency to time domain if verbose: print "|- Convert all the frequency domain to the time domain..." tdb = [] # Loop for each filter's spectrum for fdt in fdb: zero_padded = numpy.zeros(int((fdt.f0 / fdt.df).value) + len(fdt)) st = int((fdt.f0 / fdt.df).value) zero_padded[st:st + len(fdt)] = numpy.real_if_close(fdt.value) n_freq = int(sample_rate / 2 / fdt.df.value) * 2 tdt = numpy.fft.irfft(zero_padded, n_freq) * math.sqrt(sample_rate) tdt = numpy.roll(tdt, len(tdt) / 2) tdt = TimeSeries(tdt, name="", epoch=fdt.epoch, sample_rate=sample_rate) tdb.append(tdt) # Plot time series filter plot_filters(tdb, fmin, band) # Computer whitened inner products of input filters with themselves #white_filter_ip = numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, None) for f in lal_filters]) # Computer unwhitened inner products of input filters with themselves #unwhite_filter_ip = numpy.array([lalburst.ExcessPowerFilterInnerProduct(f, f, spec_corr, lal_psd) for f in lal_filters]) # Computer whitened filter inner products between input adjacent filters #white_ss_ip = numpy.array([lalburst.ExcessPowerFilterInnerProduct(f1, f2, spec_corr, None) for f1, f2 in zip(lal_filters[:-1], lal_filters[1:])]) # Computer unwhitened filter inner products between input adjacent filters #unwhite_ss_ip = numpy.array([lalburst.ExcessPowerFilterInnerProduct(f1, f2, spec_corr, lal_psd) for f1, f2 in zip(lal_filters[:-1], lal_filters[1:])]) # Check filter's bandwidth is equal to user defined channel bandwidth min_band = (len(lal_filters[0].data.data) - 1) * lal_filters[0].deltaF / 2 assert min_band == band # Create an event list where all the triggers will be stored event_list = lsctables.New(lsctables.SnglBurstTable, [ 'start_time', 'start_time_ns', 'peak_time', 'peak_time_ns', 'duration', 'bandwidth', 'central_freq', 'chisq_dof', 'confidence', 'snr', 'amplitude', 'channel', 'ifo', 'process_id', 'event_id', 'search', 'stop_time', 'stop_time_ns' ]) # Create repositories to save TF and time series plots os.system('mkdir -p segments/time-frequency') os.system('mkdir -p segments/time-series') # Define time edges t_idx_min, t_idx_max = 0, seg_len # Loop over each segment while t_idx_max <= len(ts_data): # Define first and last timestamps of the block start_time = ts_data.start_time + t_idx_min / float( ts_data.sample_rate) end_time = ts_data.start_time + t_idx_max / float(ts_data.sample_rate) if verbose: print "\n|- Analyzing block %i to %i (%.2f percent)" % ( start_time, end_time, 100 * float(t_idx_max) / len(ts_data)) # Debug for impulse response if impulse: for i in range(t_idx_min, t_idx_max): ts_data[i] = 1000. if i == (t_idx_max + t_idx_min) / 2 else 0. # Model a withen time series for the block tmp_ts_data = types.TimeSeries(ts_data[t_idx_min:t_idx_max] * window.data.data, delta_t=1. / ts_data.sample_rate, epoch=start_time) # Save time series in relevant repository os.system('mkdir -p segments/%i-%i' % (start_time, end_time)) if make_plot: # Plot time series plot_ts(tmp_ts_data, fname='segments/time-series/%i-%i.png' % (start_time, end_time)) # Convert times series to frequency series fs_data = tmp_ts_data.to_frequencyseries() if verbose: print "|- Frequency series data has variance: %s" % fs_data.data.std( )**2 # Whitening (FIXME: Whiten the filters, not the data) fs_data.data /= numpy.sqrt(fd_psd) / numpy.sqrt(2 * fd_psd.delta_f) if verbose: print "|- Whitened frequency series data has variance: %s" % fs_data.data.std( )**2 if verbose: print "|- Create time-frequency plane for current block" # Return the complex snr, along with its associated normalization of the template, # matched filtered against the data #filter.matched_filter_core(types.FrequencySeries(tmp_filter_bank,delta_f=fd_psd.delta_f), # fs_data,h_norm=1,psd=fd_psd,low_frequency_cutoff=lal_filters[0].f0, # high_frequency_cutoff=lal_filters[0].f0+2*band) if verbose: print "|- Filtering all %d channels...\n" % nchans, # Initialise 2D zero array tmp_filter_bank = numpy.zeros(len(fd_psd), dtype=numpy.complex128) # Initialise 2D zero array for time-frequency map tf_map = numpy.zeros((nchans, seg_len), dtype=numpy.complex128) # Loop over all the channels for i in range(nchans): # Reset filter bank series tmp_filter_bank *= 0.0 # Index of starting frequency f1 = int(lal_filters[i].f0 / fd_psd.delta_f) # Index of last frequency bin f2 = int((lal_filters[i].f0 + 2 * band) / fd_psd.delta_f) + 1 # (FIXME: Why is there a factor of 2 here?) tmp_filter_bank[f1:f2] = lal_filters[i].data.data * 2 # Define the template to filter the frequency series with template = types.FrequencySeries(tmp_filter_bank, delta_f=fd_psd.delta_f, copy=False) # Create filtered series filtered_series = filter.matched_filter_core( template, fs_data, h_norm=None, psd=None, low_frequency_cutoff=lal_filters[i].f0, high_frequency_cutoff=lal_filters[i].f0 + 2 * band) # Include filtered series in the map tf_map[i, :] = filtered_series[0].numpy() if make_plot: # Plot spectrogram plot_spectrogram(numpy.abs(tf_map).T, dt=tmp_ts_data.delta_t, df=band, ymax=ts_data.sample_rate / 2., t0=start_time, t1=end_time, fname='segments/time-frequency/%i-%i.png' % (start_time, end_time)) plot_tiles_ts(numpy.abs(tf_map), 2, 1, sample_rate=ts_data.sample_rate, t0=start_time, t1=end_time, fname='segments/%i-%i/ts.png' % (start_time, end_time)) #plot_tiles_tf(numpy.abs(tf_map),2,1,ymax=ts_data.sample_rate/2, # sample_rate=ts_data.sample_rate,t0=start_time,t1=end_time, # fname='segments/%i-%i/tf.png'%(start_time,end_time)) # Loop through powers of 2 up to number of channels for nc_sum in range(0, int(math.log(nchans, 2)))[::-1]: # Calculate total number of summed channels nc_sum = 2**nc_sum if verbose: print "\n\t|- Contructing tiles containing %d narrow band channels" % nc_sum # Compute full bandwidth of virtual channel df = band * nc_sum # Compute minimal signal's duration in virtual channel dt = 1.0 / (2 * df) # Compute under sampling rate us_rate = int(round(dt / ts_data.delta_t)) if verbose: print "\t|- Undersampling rate for this level: %f" % ( ts_data.sample_rate / us_rate) if verbose: print "\t|- Calculating tiles..." # Clip the boundaries to remove window corruption clip_samples = int(psd_segment_length * window_fraction * ts_data.sample_rate / 2) # Undersample narrow band channel's time series # Apply clipping condition because [0:-0] does not give the full array tf_map_temp = tf_map[:,clip_samples:-clip_samples:us_rate] \ if clip_samples > 0 else tf_map[:,::us_rate] # Initialise final tile time-frequency map tiles = numpy.zeros(((nchans + 1) / nc_sum, tf_map_temp.shape[1])) # Loop over tile index for i in xrange(len(tiles)): # Sum all inner narrow band channels ts_tile = numpy.absolute(tf_map_temp[nc_sum * i:nc_sum * (i + 1)].sum(axis=0)) # Define index of last narrow band channel for given tile n = (i + 1) * nc_sum - 1 n = n - 1 if n == len(lal_filters) else n # Computer withened inner products of each input filter with itself mu_sq = nc_sum * lalburst.ExcessPowerFilterInnerProduct( lal_filters[n], lal_filters[n], spec_corr, None) #kmax = nc_sum-1 if n==len(lal_filters) else nc_sum-2 # Loop over the inner narrow band channels for k in xrange(0, nc_sum - 1): # Computer whitened filter inner products between input adjacent filters mu_sq += 2 * lalburst.ExcessPowerFilterInnerProduct( lal_filters[n - k], lal_filters[n - 1 - k], spec_corr, None) # Normalise tile's time series tiles[i] = ts_tile.real**2 / mu_sq if verbose: print "\t|- TF-plane is %dx%s samples" % tiles.shape if verbose: print "\t|- Tile energy mean %f, var %f" % (numpy.mean(tiles), numpy.var(tiles)) # Define maximum number of degrees of freedom and check it larger or equal to 2 max_dof = 32 if max_duration == None else int(max_duration / dt) assert max_dof >= 2 # Loop through multiple degrees of freedom for j in [2**l for l in xrange(0, int(math.log(max_dof, 2)))]: # Duration is fixed by the NDOF and bandwidth duration = j * dt if verbose: print "\n\t\t|- Summing DOF = %d ..." % (2 * j) if verbose: print "\t\t|- Explore signal duration of %f s..." % duration # Construct filter sum_filter = numpy.array([1, 0] * (j - 1) + [1]) # Calculate length of filtered time series tlen = tiles.shape[1] - sum_filter.shape[0] + 1 # Initialise filtered time series array dof_tiles = numpy.zeros((tiles.shape[0], tlen)) # Loop over tiles for f in range(tiles.shape[0]): # Sum and drop correlate tiles dof_tiles[f] = fftconvolve(tiles[f], sum_filter, 'valid') if verbose: print "\t\t|- Summed tile energy mean: %f" % ( numpy.mean(dof_tiles)) if verbose: print "\t\t|- Variance tile energy: %f" % ( numpy.var(dof_tiles)) if make_plot: plot_spectrogram( dof_tiles.T, dt, df, ymax=ts_data.sample_rate / 2, t0=start_time, t1=end_time, fname='segments/%i-%i/%02ichans_%02idof.png' % (start_time, end_time, nc_sum, 2 * j)) plot_tiles_ts( dof_tiles, 2 * j, df, sample_rate=ts_data.sample_rate / us_rate, t0=start_time, t1=end_time, fname='segments/%i-%i/%02ichans_%02idof_ts.png' % (start_time, end_time, nc_sum, 2 * j)) plot_tiles_tf( dof_tiles, 2 * j, df, ymax=ts_data.sample_rate / 2, sample_rate=ts_data.sample_rate / us_rate, t0=start_time, t1=end_time, fname='segments/%i-%i/%02ichans_%02idof_tf.png' % (start_time, end_time, nc_sum, 2 * j)) threshold = scipy.stats.chi2.isf(tile_fap, j) if verbose: print "\t\t|- Threshold for this level: %f" % threshold spant, spanf = dof_tiles.shape[1] * dt, dof_tiles.shape[0] * df if verbose: print "\t\t|- Processing %.2fx%.2f time-frequency map." % ( spant, spanf) # Since we clip the data, the start time needs to be adjusted accordingly window_offset_epoch = fs_data.epoch + psd_segment_length * window_fraction / 2 window_offset_epoch = LIGOTimeGPS(float(window_offset_epoch)) for i, j in zip(*numpy.where(dof_tiles > threshold)): event = event_list.RowType() # The points are summed forward in time and thus a `summed point' is the # sum of the previous N points. If this point is above threshold, it # corresponds to a tile which spans the previous N points. However, the # 0th point (due to the convolution specifier 'valid') is actually # already a duration from the start time. All of this means, the + # duration and the - duration cancels, and the tile 'start' is, by # definition, the start of the time frequency map if j = 0 # FIXME: I think this needs a + dt/2 to center the tile properly event.set_start(window_offset_epoch + float(j * dt)) event.set_stop(window_offset_epoch + float(j * dt) + duration) event.set_peak(event.get_start() + duration / 2) event.central_freq = lal_filters[ 0].f0 + band / 2 + i * df + 0.5 * df event.duration = duration event.bandwidth = df event.chisq_dof = 2 * duration * df event.snr = math.sqrt(dof_tiles[i, j] / event.chisq_dof - 1) # FIXME: Magic number 0.62 should be determine empircally event.confidence = -lal.LogChisqCCDF( event.snr * 0.62, event.chisq_dof * 0.62) event.amplitude = None event.process_id = None event.event_id = event_list.get_next_id() event_list.append(event) for event in event_list[::-1]: if event.amplitude != None: continue etime_min_idx = float(event.get_start()) - float( fs_data.epoch) etime_min_idx = int(etime_min_idx / tmp_ts_data.delta_t) etime_max_idx = float(event.get_start()) - float( fs_data.epoch) + event.duration etime_max_idx = int(etime_max_idx / tmp_ts_data.delta_t) # (band / 2) to account for sin^2 wings from finest filters flow_idx = int((event.central_freq - event.bandwidth / 2 - (df / 2) - fmin) / df) fhigh_idx = int((event.central_freq + event.bandwidth / 2 + (df / 2) - fmin) / df) # TODO: Check that the undersampling rate is always commensurate # with the indexing: that is to say that # mod(etime_min_idx, us_rate) == 0 always z_j_b = tf_map[flow_idx:fhigh_idx, etime_min_idx:etime_max_idx:us_rate] # FIXME: Deal with negative hrss^2 -- e.g. remove the event try: event.amplitude = measure_hrss( z_j_b, unwhite_filter_ip[flow_idx:fhigh_idx], unwhite_ss_ip[flow_idx:fhigh_idx - 1], white_ss_ip[flow_idx:fhigh_idx - 1], fd_psd.delta_f, tmp_ts_data.delta_t, len(lal_filters[0].data.data), event.chisq_dof) except ValueError: event.amplitude = 0 if verbose: print "\t\t|- Total number of events: %d" % len(event_list) t_idx_min += int(seg_len * (1 - window_fraction)) t_idx_max += int(seg_len * (1 - window_fraction)) setname = "MagneticFields" __program__ = 'pyburst_excesspower_gnome' start_time = LIGOTimeGPS(int(ts_data.start_time)) end_time = LIGOTimeGPS(int(ts_data.end_time)) inseg = segment(start_time, end_time) xmldoc = ligolw.Document() xmldoc.appendChild(ligolw.LIGO_LW()) ifo = channel_name.split(":")[0] straindict = psd.insert_psd_option_group.__dict__ proc_row = register_to_xmldoc(xmldoc, __program__, straindict, ifos=[ifo], version=git_version.id, cvs_repository=git_version.branch, cvs_entry_time=git_version.date) dt_stride = psd_segment_length sample_rate = ts_data.sample_rate # Amount to overlap successive blocks so as not to lose data window_overlap_samples = window_fraction * sample_rate outseg = inseg.contract(window_fraction * dt_stride / 2) # With a given dt_stride, we cannot process the remainder of this data remainder = math.fmod(abs(outseg), dt_stride * (1 - window_fraction)) # ...so make an accounting of it outseg = segment(outseg[0], outseg[1] - remainder) ss = append_search_summary(xmldoc, proc_row, ifos=(station, ), inseg=inseg, outseg=outseg) for sb in event_list: sb.process_id = proc_row.process_id sb.search = proc_row.program sb.ifo, sb.channel = station, setname xmldoc.childNodes[0].appendChild(event_list) ifostr = ifo if isinstance(ifo, str) else "".join(ifo) st_rnd, end_rnd = int(math.floor(inseg[0])), int(math.ceil(inseg[1])) dur = end_rnd - st_rnd fname = "%s-excesspower-%d-%d.xml.gz" % (ifostr, st_rnd, dur) utils.write_filename(xmldoc, fname, gz=fname.endswith("gz")) plot_triggers(fname)
def get_multi_burst_row(multi_burst_table, sim_tree): row = multi_burst_table.RowType() row.set_peak(sum(LIGOTimeGPS(t) for t in sim_tree.time) / sim_tree.ndim) return row
def create_tables(self, xmldoc, rootfiles): """ Sets up table structures and calls populating methods. """ if os.path.splitext(rootfiles[0])[1] == ".root": sim_tree = TChain("waveburst") else: # If the file is (for example) text, use a proxy class sim_tree = CWBTextConverter() for rootfile in rootfiles: sim_tree.Add(rootfile) # Define tables sngl_burst_table = lsctables.New(lsctables.SnglBurstTable, [ "peak_time_ns", "start_time_ns", "stop_time_ns", "process_id", "ifo", "peak_time", "start_time", "stop_time", "duration", "time_lag", "peak_frequency", "search", "flow", "fhigh", "bandwidth", "tfvolume", "hrss", "event_id", "snr" ]) xmldoc.childNodes[0].appendChild(sngl_burst_table) sngl_burst_table.sync_next_id() coinc_event_table = lsctables.New(lsctables.CoincTable, [ "process_id", "coinc_event_id", "nevents", "instruments", "time_slide_id", "coinc_def_id", "likelihood" ]) xmldoc.childNodes[0].appendChild(coinc_event_table) coinc_event_table.sync_next_id() # TODO: Reimplement this when the cwb_table module is included #if self.cwbtable: #cohwb_table = lsctables.New(cwb_table.CoherentWaveburstTable, #["ellipticity", "correlated_energy", "eff_correlated_energy", #"coherent_network_amp", "penalty", "network_correlation", #"energy_disbalance", "ellip_energy_disbalance", "process_id", #"coinc_event_id", "cwb_id"]) #xmldoc.childNodes[0].appendChild(cohwb_table) #cohwb_table.sync_next_id() multi_burst_table = lsctables.New( lsctables.MultiBurstTable, [ "process_id", "peak_time", "peak_time_ns", "coinc_event_id", "snr", "ifos", # NOTE: Added to the table definition "false_alarm_rate", "ligo_axis_ra", "ligo_axis_dec" ]) xmldoc.childNodes[0].appendChild(multi_burst_table) coinc_event_map_table = lsctables.New(lsctables.CoincMapTable) xmldoc.childNodes[0].appendChild(coinc_event_map_table) jobsegment = None if self.start and self.end: jobsegment = segments.segment(LIGOTimeGPS(self.start), LIGOTimeGPS(self.end)) if self.verbose: print "Creating Process Table...", if self.job_list: self.do_process_table(xmldoc, sim_tree) else: self.do_process_table_from_segment(xmldoc, sim_tree, jobsegment) process_index = dict( (int(row.process_id), row) for row in lsctables.ProcessTable.get_table(xmldoc)) if self.verbose: print " done." if self.verbose: print "Creating Summary Table...", # If we are provided a joblist, use it to generate the list if self.job_list: self.do_summary_table_from_joblist(xmldoc, sim_tree) elif self.job_list == None and self.start and self.end: self.do_summary_table_from_segment(xmldoc, jobsegment, sim_tree) else: self.do_summary_table(xmldoc, sim_tree) if self.verbose: print " done." # create coinc_definer row row = self.get_coinc_def_row(sim_tree) coinc_def_id = llwapp.get_coinc_def_id(xmldoc, row.search, row.search_coinc_type, description=row.description) entries = sim_tree.GetEntries() for i in range(0, entries): sim_tree.GetEntry(i) if self.start != None: if float(self.start) > sim_tree.start[0]: continue if self.end != None: if float(self.end) < sim_tree.stop[0]: continue offset_vector = dict( (get_ifos_from_index(instrument_index), offset) for instrument_index, offset in zip(sim_tree.ifo, sim_tree.lag)) coinc_event = coinc_event_table.RowType() coinc_event.process_id = process_index[sim_tree.run].process_id coinc_event.coinc_event_id = coinc_event_table.get_next_id() coinc_event.coinc_def_id = coinc_def_id coinc_event.nevents = sim_tree.ndim coinc_event.instruments = lsctables.ifos_from_instrument_set( get_ifos_from_index( branch_array_to_list(sim_tree.ifo, sim_tree.ndim))) coinc_event.time_slide_id = time_slide.get_time_slide_id( xmldoc, offset_vector, process_index[sim_tree.run]) coinc_event.likelihood = sim_tree.likelihood coinc_event_table.append(coinc_event) for d in range(0, sim_tree.ndim): sngl_burst = self.get_sngl_burst_row(sngl_burst_table, sim_tree, d) sngl_burst.process_id = coinc_event.process_id sngl_burst.event_id = sngl_burst_table.get_next_id() sngl_burst_table.append(sngl_burst) coinc_event_map = coinc_event_map_table.RowType() coinc_event_map.event_id = sngl_burst.event_id coinc_event_map.table_name = sngl_burst.event_id.table_name coinc_event_map.coinc_event_id = coinc_event.coinc_event_id coinc_event_map_table.append(coinc_event_map) # TODO: Reimplement when cwb_table module is included #if self.cwbtable: #cwb = get_cwb_row(cohwb_table, sim_tree) #cwb.process_id = coinc_event.process_id #cwb.coinc_event_id = coinc_event.coinc_event_id #cwb.cwb_id = cohwb_table.get_next_id() #cohwb_table.append(cwb) multi_burst = self.get_multi_burst_row(multi_burst_table, sim_tree) multi_burst.process_id = coinc_event.process_id multi_burst.coinc_event_id = coinc_event.coinc_event_id # NOTE: Until we have an embedded cwb table definition, this will be # copied here so official tools have a ranking statistic to use try: multi_burst.snr = sim_tree.rho[1] except TypeError: # difference in definition between ROOT and text multi_burst.snr = sim_tree.rho # NOTE: To be filled in later by farburst multi_burst.false_alarm_rate = -1.0 # Reconstructed right ascension and declination multi_burst.ligo_axis_ra = sim_tree.phi[2] multi_burst.ligo_axis_dec = sim_tree.theta[2] multi_burst.ifos = lsctables.ifos_from_instrument_set( get_ifos_from_index( branch_array_to_list(sim_tree.ifo, sim_tree.ndim))) multi_burst_table.append(multi_burst)
def do_summary_table_from_segment(self, xmldoc, segment, sim_tree, jobid=-1): """ Create the search_summary table for a cWB from a segment specified from the command line. The function will try to determine the proper job intervals from the waveoffset, if specified. """ try: search_summary = lsctables.SearchSummaryTable.get_table(xmldoc) except ValueError: search_summary = lsctables.New(lsctables.SearchSummaryTable, [ "process_id", "nevents", "ifos", "comment", "in_start_time", "in_start_time_ns", "out_start_time", "out_start_time_ns", "in_end_time", "in_end_time_ns", "out_end_time", "out_end_time_ns" ]) xmldoc.childNodes[0].appendChild(search_summary) process_id_type = type( lsctables.ProcessTable.get_table(xmldoc).next_id) sim_tree.GetEntry(0) if (jobid < 0): run = sim_tree.run else: run = jobid seg = segment # Search Summary Table # events found in the run -> nevents row = search_summary.RowType() row.process_id = process_id_type(run) row.nevents = sim_tree.GetEntries() ifos = lsctables.ifos_from_instrument_set( get_ifos_from_index( branch_array_to_list(sim_tree.ifo, sim_tree.ndim))) # Imstruments involved in the search if (ifos == None or len(ifos) == 0): if (self.instruments): ifos = self.instruments else: # Not enough information to completely fill out the table sys.exit( "Found a job with no IFOs on, or not enough to determine IFOs. Try specifying instruments directly." ) row.ifos = ifos row.comment = "waveburst" # Begin and end time of the segment waveoffset = self.waveoffset if waveoffset == None: waveoffset = 0 # in -- with waveoffset row.set_in(seg) # out -- without waveoffset waveoffset = LIGOTimeGPS(waveoffset) row.set_out(segments.segment(seg[0] + waveoffset, seg[1] - waveoffset)) search_summary.append(row)
def get_segment(self): """ Return the segment described by this row. """ return segments.segment(LIGOTimeGPS(self.start_time, 0), LIGOTimeGPS(self.end_time, 0))