def create_spectrograms(filename, out_filename, start_sample, num_samples): with open(filename, 'r') as f: lba = LBAFile(f) samples = lba.read(start_sample, num_samples) LOG.info("Read {0} samples".format(num_samples)) os.makedirs("{0}_{1}".format(out_filename, start_sample), exist_ok=True) for pindex in range(samples.shape[2]): for findex in range(samples.shape[1]): f, t, sxx = signal.spectrogram(samples[:, findex, pindex], fs=SAMPLE_RATE, window=('tukey', 0.5)) fig = plt.figure(figsize=(16, 9), dpi=80) plt.xlabel("Time [sec]") plt.ylabel("Frequency [MHz]") name = "{0}_p{1}_f{2}".format(out_filename, pindex, findex) plt.title(name) plt.pcolormesh(t, f, sxx) plt.colorbar() LOG.info("Saving plot {0}".format(name)) plt.savefig("{0}_{1}/{2}.png".format(out_filename, start_sample, name)) fig.clear() plt.close(fig)
def main(): args = parse_args() with open(args['lba_file'], 'r') as f: lba_file = LBAFile(f) samples = lba_file.read(args['offset'], args['samples']) factor = args['factor'] downsamples = np.zeros((samples.shape[0] // factor, samples.shape[1], samples.shape[2])) for pindex, f in itertools.product(range(2), range(4)): readsamples = samples[:, f, pindex] downsamples[:, f, pindex] = signal.decimate(readsamples, args['factor']) np.savez_compressed(args['output_file'], downsamples)
def save_all(args): basename, ext = os.path.splitext(args['outfile']) outname = "{0}_all.hdf5".format(basename) try: os.remove(outname) except Exception: pass with open(args['lba_file'], 'r') as f: lba = LBAFile(f) max_samples = lba.max_samples CHUNK_SIZE = 1024 * 1024 * 10 samples_read = 0 with h5py.File(outname, 'w') as outfile: while samples_read < max_samples: to_read = min(max_samples - samples_read, CHUNK_SIZE) samples = lba.read(samples_read, to_read) samples_read += to_read LOG.info("{0} / {1}. {2}%".format( samples_read, max_samples, (samples_read / max_samples) * 100)) for polarisation in range(0, samples.shape[2]): polarisation_group = outfile.require_group( 'polarisation_{0}'.format(polarisation)) for channel in range(0, samples.shape[1]): channel_group = polarisation_group.require_group( 'channel_{0}'.format(channel)) try: dataset = channel_group['samples'] dataset.resize(dataset.shape[0] + samples.shape[0], axis=0) dataset[-samples.shape[0]:] = samples[:, channel, polarisation] except: channel_group.create_dataset( 'samples', data=samples[:, channel, polarisation], maxshape=(None, ))
def load_real_noise(filename, num_batches, batch_size, frequency=None, polarisation=None): """ Load noise data from the specified file :param filename: :param num_samples: :param batch_size: :param use_cuda: :return: """ data = np.zeros((num_batches, batch_size), dtype=np.float32) with open(filename, 'r') as f: lba = LBAFile(f) # Get a bunch of random indexes into the file that will not overflow if we read batch_size samples from # that index onward indexes = (np.random.rand(num_batches) * (lba.max_samples - batch_size)).astype(int) # For each batch, either use the provided frequency and polarisation values, # or pick randomly from the 4 frequencies and 2 polarisations. frequency_indexes = (np.random.rand(num_batches) * 3).astype(int) \ if frequency is None else np.repeat(frequency, num_batches) polarisation_indexes = (np.random.rand(num_batches) * 2).astype(int) \ if polarisation is None else np.repeat(polarisation, num_batches) # Get data for each batch for batch in range(num_batches): if batch % 100 == 0 or batch == num_batches - 1: LOG.info("Loading real data batch {0} / {1}".format( batch + 1, num_batches)) lba_data = lba.read(indexes[batch], batch_size) data[batch] = lba_data[:, frequency_indexes[batch], polarisation_indexes[batch]] data = normalise(data) return data
def __call__(self): """ Run the preprocessor """ with open(self.file, 'r') as infile: lba = LBAFile(infile) max_samples = lba.max_samples # Ignore any samples at the end that won't fill a full fft window. max_samples -= max_samples % self.fft_window if self.max_ffts > 0: max_samples = min(self.fft_window * self.max_ffts, max_samples) max_ffts = max_samples // self.fft_window if self.max_ffts == 0: self.max_ffts = max_ffts # Get the max FFTs from the lba file as the user has not specified samples_read = 0 with h5py.File(self.outfile, 'w') as outfile: outfile.attrs['fft_window'] = self.fft_window outfile.attrs['samples'] = max_samples outfile.attrs['fft_count'] = max_ffts outfile.attrs['input_size'] = self.input_size outfile.attrs['cutoff'] = self.cutoff while samples_read < max_samples: remaining_ffts = (max_samples - samples_read) // self.fft_window LOG.info("Processed {0} out of {1} fft windows".format(max_ffts - remaining_ffts, max_ffts)) ffts_to_read = min(remaining_ffts, 128) samples_to_read = self.fft_window * ffts_to_read samples = lba.read(samples_read, samples_to_read) self.output_fft_batch(samples, ffts_to_read, outfile) samples_read += samples_to_read LOG.info("Processed {0} out of {0} fft windows".format(max_ffts, max_ffts))
def save_fft_data(filename, outfilename, sample_size, chunks_per_file): # Pick random position in file, where position + sample_size < max_samples # Read the data # Create a dataset for each frequency, then under each frequency, each polarisation with open(filename, 'r') as f: lba = LBAFile(f) with h5py.File(outfilename, 'w') as outfile: real = {} fake1 = generate_fake_noise(chunks_per_file, sample_size) fake2 = generate_fake_noise(chunks_per_file, sample_size) for _ in range(chunks_per_file): sample_position = np.random.randint( 0, lba.max_samples - sample_size) samples = lba.read(sample_position, sample_size) for pindex in range(samples.shape[2]): pdict = real.setdefault("p{0}".format(pindex), {}) for findex in range(samples.shape[1]): flist = pdict.setdefault("f{0}".format(findex), []) fft = np.fft.fft(samples[:, findex, pindex]) flist.append(np.concatenate((fft.real, fft.imag))) save_hdf5(outfile, {"fake1": fake1, "fake2": fake2, "real": real})
def main(): args = parse_args() LOG.info("Starting...") if args['all']: save_all(args) # Form name for outfile using base name, samples, channel, polarisation basename, ext = os.path.splitext(args['outfile']) if args['fft']: outname = "{0}_c{1}_p{2}_s{3}_fft{4}{5}".format( basename, args['channel'], args['polarisation'], args['samples'], args['fft_size'], ext) else: outname = "{0}_c{1}_p{2}_s{3}{4}".format(basename, args['channel'], args['polarisation'], args['samples'], ext) try: os.remove(outname) except Exception: pass with open(args['lba_file'], 'r') as f: lba = LBAFile(f) max_samples = args['samples'] if args['fft']: # If using FFT, set the chunk size to the fft size. # Either read out the next largest multiple of this chunk size above the specified max_samples, # if there's enough samples in the lba file to do this. If not, read the next smallest. CHUNK_SIZE = args['fft_size'] max_possible_samples = lba.max_samples - ( lba.max_samples % CHUNK_SIZE ) # Next smallest multiple of CHUNK_SIZE max_samples = max_samples - ( max_samples % CHUNK_SIZE) + CHUNK_SIZE # Next largest multiple of CHUNK_SIZE max_samples = min(max_possible_samples, max_samples) else: CHUNK_SIZE = 1024 * 1024 * 10 samples_read = 0 channel = args['channel'] polarisation = args['polarisation'] with h5py.File(outname, 'w') as outfile: while samples_read < max_samples: to_read = min(max_samples - samples_read, CHUNK_SIZE) samples = lba.read(samples_read, to_read)[:, channel, polarisation] samples_read += to_read LOG.info("{0} / {1}. {2}%".format( samples_read, max_samples, (samples_read / max_samples) * 100)) if args['fft']: # Read lba samples, fft them and write out write_fft(outfile, 'data', samples, args['fft_angles_abs']) else: # Write samples out raw write_raw(outfile, 'data', samples)
def preprocess(self, name, input_file, observation): lba = LBAFile(input_file, self.sample_rate) if self.max_samples > 0: # User specified max number of samples max_samples = min(self.max_samples, lba.max_samples) else: max_samples = lba.max_samples # Length of the observation for the number of samples we're reading obs_length = lba.obs_length(max_samples) start = lba.obs_start end = start + datetime.timedelta(seconds=obs_length) LOG.info("LBA obs time: start {0} end {1} duration {2} sec".format(start, end, obs_length)) observation.observation_name = name observation.original_file_name = name observation.original_file_type = 'lba' observation.additional_metadata = json.dumps(lba.header) observation.antenna_name = self.antenna_name if self.antenna_name is not None else lba.header.get('ANTENNANAME', '') observation.sample_rate = self.sample_rate observation.length_seconds = obs_length observation.start_time = start.timestamp() observation.num_channels = lba.num_channels channel_map = None if self.obs_filename is not None: try: vex = pyvex.Vex(self.obs_filename) # self._fill_on_source_array1(vex, start, end, max_samples) self._fill_source_array(observation, vex, start, end, max_samples) # Get channel info from the VEX file. # Pick the appropriate mode from the vex file. We run with the assumption for now that there's only one # mode, and get everything from it. If there are multiple modes, then error out if len(vex.modes) == 0: LOG.warning("No modes in vex file to get channel info from") elif len(vex.modes) > 1: LOG.error("Cannot get channel information from vex file because multiple modes are present. This is currently unsupported") else: # Get antenna info antenna = next((a for a in vex.antennas if a.def_name == self.antenna_name), None) if antenna is None: LOG.error("Specified antenna def name {0} is not present in the vex file".format(self.antenna_name)) else: LOG.info("Found antenna def name {0}. Name {1}".format(self.antenna_name, antenna.name)) mode = vex.modes[0] setup = mode.setups[antenna.name] channel_map = [[channel, mode.subbands[channel.subband_id], setup.ifs["IF_{0}".format(channel.if_name)]] for channel in setup.channels] # TODO: I'm unsure if this is correct or what order we should use here # - Ordering by subband_id will match the CH0(0-8) ordering present in the vex file. # - Ordering by record_chan seems to make some logic sense as it orders them from low to # high frequency across pol R then pol L. It also orders the BBC values ascending. # - Neither of these seem to match the observation_details.md table. # TODO: For now, I'll assume sorting by record chan channel_map.sort(key=lambda c: c[0].record_chan) except Exception as e: LOG.error("Failed to parse vex file {0}".format(e)) samples_read = 0 while samples_read < max_samples: remaining_samples = max_samples - samples_read samples_to_read = min(remaining_samples, self.chunk_size) samples = lba.read(samples_read, samples_to_read) for channel_index in range(samples.shape[1]): # Loop over each channel, then either output the channel directly # into the HDF5 file, or get the channel info to get the appropriate metadata # for the channel, then output that. channel_name = "channel_{0}".format(channel_index) out_channel = observation[channel_name] if out_channel is None: out_channel = observation.create_channel(channel_name, shape=(max_samples,), dtype=np.int8) if channel_map is not None: vex_channel, vex_subband, vex_if = channel_map[channel_index] # Add info from the vex file to the channel out_channel.freq_start = vex_channel.bbc_freq out_channel.freq_end = vex_channel.bbc_freq + vex_channel.bbc_bandwidth out_channel.additional_metadata = json.dumps({ 'channel_name': vex_channel.name, 'polarisation': vex_subband.pol.decode('utf-8'), 'bbc_name': vex_channel.bbc_name, 'record_chan': vex_channel.record_chan, 'subband_id': vex_channel.subband_id, 'vlba_band_name': vex_if.vlba_band_name, 'if_sslo': vex_if.if_sslo, 'lower_edge_freq': vex_if.lower_edge_freq }, indent=4) data = samples[:, channel_index] out_channel.write_data(samples_read, data) samples_read += samples_to_read if (samples_read // self.chunk_size) % 10 == 0: LOG.info("{0:.2%} {1}/{2}".format(samples_read / max_samples, samples_read, max_samples)) LOG.info("LBA preprocessor complete")
def __call__(self): # Firstly, create the output directory os.makedirs(self.out_directory, exist_ok=True) with open(self.filename, "r") as f: lba = LBAFile(f) samples = lba.read(self.sample_offset, self.num_samples) # Do global things across all samples self.output_sample_statistics(samples) # Split into p0 and p1 p0 = samples[:, :, 0] p1 = samples[:, :, 1] for pindex, p in enumerate((p0, p1)): # Do things for each polarisation self.polarisation = pindex os.makedirs(self.get_output_filename(), exist_ok=True) self.output_sample_statistics(p) spectrograms = [] for freq in range(p.shape[1]): # Do things for each frequency freq_samples = p[:, freq] self.frequency = freq os.makedirs(self.get_output_filename(), exist_ok=True) self.output_sample_statistics(freq_samples) # Spectrogram for this frequency f, t, sxx = self.create_spectrogram(freq_samples) # Calculate the actual frequencies for the spectrogram self.fix_freq(f, freq) spectrogram = (f, t, sxx) spectrograms.append(spectrogram) self.save_spectrogram(spectrogram) # Periodogram for this frequency f, pxx = self.create_periodogram(freq_samples) self.fix_freq(f, freq) self.save_periodogram((f, pxx)) # Welch f, spec = self.create_welch(freq_samples) self.fix_freq(f, freq) self.save_welch((f, spec)) # Lombscargle f, pxx = self.create_lombscargle(freq_samples) self.fix_freq(f, freq) self.save_lombscargle((f, pxx)) # FFT f, ft = self.create_fft(freq_samples) self.fix_freq(f, freq) self.save_fft((f, ft)) self.frequency = None # Create merged spectrograms for this p merged = self.merge_spectrograms(spectrograms) merged_normalised = self.merge_spectrograms( spectrograms, normalise_local=True) self.save_spectrogram(merged, "merged", "spectrogram_merged.png") self.save_spectrogram(merged_normalised, "merged local normalisation", "spectrogram_merged_normalised.png")
def __call__(self): logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s:%(message)s') self.LOG = logging.getLogger(__name__) self.LOG.info("Plotter for {0} started".format(self.filename)) matplotlib.rc('font', weight='normal', size=18) # Firstly, create the output directory os.makedirs(self.out_directory, exist_ok=True) self.LOG.info("Output directory {0} created".format(self.out_directory)) if self.filename.endswith(".lba"): with open(self.filename, "r") as f: self.LOG.info("Opening LBA file {0} and reading samples...".format(self.filename)) lba = LBAFile(f) samples = lba.read(self.sample_offset, self.num_samples) del lba gc.collect() # Ensure the loaded lba file is unloaded elif self.filename.endswith(".npz"): samples = np.load(self.filename)["arr_0"] self.LOG.info("Read {0} samples".format(self.num_samples)) # Do global things across all samples self.LOG.info("Calculating sample statistics for entire dataset...") self.output_sample_statistics(samples) # Iterate over each of the two polarisations for pindex in range(samples.shape[2]): self.LOG.info("{0} Polarisation {1}".format(self.filename, pindex)) p = samples[:, :, pindex] self.polarisation = pindex os.makedirs(self.get_output_filename(), exist_ok=True) self.LOG.info("{0}, P{1} Sample statistics...".format(self.filename, pindex)) self.output_sample_statistics(p) spectrogram_groups = [[], []] # freq1, freq2 : freq3, freq4 # Iterate over each of the four frequencies for freq in range(p.shape[1]): self.LOG.info("{0}, P{1} Frequency {2}".format(self.filename, pindex, freq)) freq_samples = p[:, freq] self.frequency = freq os.makedirs(self.get_output_filename(), exist_ok=True) self.LOG.info("{0}, P{1}, F{2} Sample statistics...".format(self.filename, pindex, freq)) self.output_sample_statistics(freq_samples) # Spectrogram for this frequency self.LOG.info("{0}, P{1}, F{2} Spectrogram".format(self.filename, pindex, freq)) f, t, sxx = self.create_spectrogram(freq_samples) # Calculate the actual frequencies for the spectrogram self.fix_freq(f, freq) spectrogram = (f, t, sxx) spectrogram_groups[freq // 2].append(spectrogram) self.LOG.info("{0}, P{1}, F{2} Spectrogram Saving".format(self.filename, pindex, freq)) self.save_spectrogram(spectrogram) # Periodogram for this frequency self.LOG.info("{0}, P{1}, F{2} Periodogram".format(self.filename, pindex, freq)) f, pxx = self.create_periodogram(freq_samples) self.fix_freq(f, freq) self.LOG.info("{0}, P{1}, F{2} Periodogram Saving".format(self.filename, pindex, freq)) self.save_periodogram((f, pxx)) # Welch self.LOG.info("{0}, P{1}, F{2} Welch".format(self.filename, pindex, freq)) f, spec = self.create_welch(freq_samples) self.fix_freq(f, freq) self.LOG.info("{0}, P{1}, F{2} Welch Saving".format(self.filename, pindex, freq)) self.save_welch((f, spec)) # Lombscargle try: self.LOG.info("{0}, P{1}, F{2} Lombscargle".format(self.filename, pindex, freq)) f, pxx = self.create_lombscargle(freq_samples) self.fix_freq(f, freq) self.LOG.info("{0}, P{1}, F{2} Lombscargle Saving".format(self.filename, pindex, freq)) self.save_lombscargle((f, pxx)) except ZeroDivisionError: print("Zero division in Lombscargle") # RFFT self.LOG.info("{0}, P{1}, F{2} RFFT".format(self.filename, pindex, freq)) f, ft = self.create_rfft(freq_samples) self.fix_freq(f, freq) self.LOG.info("{0}, P{1}, F{2} RFFT Saving".format(self.filename, pindex, freq)) self.save_rfft((f, ft)) # IFFT self.LOG.info("{0}, P{1}, F{2} IRFFT".format(self.filename, pindex, freq)) f, ft = self.create_ifft(freq_samples) self.fix_freq(f, freq) self.LOG.info("{0}, P{1}, F{2} IRFFT Saving".format(self.filename, pindex, freq)) self.save_ifft((f, ft)) # power spectral density self.LOG.info("{0}, P{1}, F{2} PSD".format(self.filename, pindex, freq)) Pxx, f = self.create_psd(freq_samples) self.fix_freq(f, freq) self.LOG.info("{0}, P{1}, F{2} PSD Saving".format(self.filename, pindex, freq)) self.save_psd((Pxx, f), "Power Spectral Density", "Power Spectral Density [V/rtMHz]") # amplitude spectral density self.LOG.info("{0}, P{1}, F{2} ASD".format(self.filename, pindex, freq)) np.sqrt(Pxx, Pxx) self.LOG.info("{0}, P{1}, F{2} ASD Saving".format(self.filename, pindex, freq)) self.save_psd((Pxx, f), "Amplitude Spectral Density", "Amplitude Spectral Density [sqrt(V/rtMHz)]") self.frequency = None # Create merged spectrograms for this p self.LOG.info("{0}, P{1} Merged Spectrograms...".format(self.filename, pindex)) for index, group in enumerate(spectrogram_groups): self.LOG.info("{0}, P{1} Merged Spectrograms Group {2}".format(self.filename, pindex, index)) merged = self.merge_spectrograms(group) merged_normalised = self.merge_spectrograms(group, normalise_local=True) self.LOG.info("{0}, P{1} Merged Spectrograms Group {2} Saving".format(self.filename, pindex, index)) self.save_spectrogram(merged, "group {0} merged".format(index), "spectrogram_group{0}_merged.png".format(index)) self.save_spectrogram(merged_normalised, "group {0} merged local normalisation".format(index), "spectrogram_group{0}_merged_normalised.png".format(index))
def __call__(self): LOGGER.info("Plotter for {0} started".format(self.filename)) matplotlib.rc('font', weight='normal', size=18) # Firstly, create the output directory os.makedirs(self.out_directory, exist_ok=True) LOGGER.info("Output directory {0} created".format(self.out_directory)) with open(self.filename, "r") as f: LOGGER.info("Opening LBA file {0} and reading samples...".format( self.filename)) lba = LBAFile(f) samples = lba.read(self.sample_offset, self.num_samples) del lba gc.collect() # Ensure the loaded lba file is unloaded LOGGER.info("Read {0} samples".format(self.num_samples)) # Do global things across all samples LOGGER.info("Calculating sample statistics for entire dataset...") self.output_sample_statistics(samples) # Iterate over each of the two polarisations for pindex in range(samples.shape[2]): LOGGER.info("{0} Polarisation {1}".format(self.filename, pindex)) p = samples[:, :, pindex] self.polarisation = pindex os.makedirs(self.get_output_filename(), exist_ok=True) LOGGER.info("{0}, P{1} Sample statistics...".format( self.filename, pindex)) self.output_sample_statistics(p) # spectrogram_groups = [[], []] # freq1, freq2 : freq3, freq4 # Iterate over each of the four frequencies for freq_index in range(p.shape[1]): LOGGER.info("{0}, P{1} Frequency {2}".format( self.filename, pindex, freq_index)) freq_samples = p[:, freq_index] self.frequency = freq_index os.makedirs(self.get_output_filename(), exist_ok=True) LOGGER.info("{0}, P{1}, F{2} Sample statistics...".format( self.filename, pindex, freq_index)) self.output_sample_statistics(freq_samples) # Spectrogram for this frequency LOGGER.info("{0}, P{1}, F{2} Spectrogram".format( self.filename, pindex, freq_index)) f, t, sxx = self.create_spectrogram(freq_samples) # Calculate the actual frequencies for the spectrogram self.fix_freq(f, freq_index) spectrogram = (f, t, sxx) # spectrogram_groups[freq_index // 2].append(spectrogram) LOGGER.info("{0}, P{1}, F{2} Spectrogram Saving".format( self.filename, pindex, freq_index)) self.save_spectrogram(spectrogram) # Periodogram for this frequency # LOGGER.info("{0}, P{1}, F{2} Periodogram".format(self.filename, pindex, freq_index)) # f, pxx = self.create_periodogram(freq_samples) # self.fix_freq(f, freq_index) # LOGGER.info("{0}, P{1}, F{2} Periodogram Saving".format(self.filename, pindex, freq_index)) # self.save_periodogram((f, pxx)) # Welch LOGGER.info("{0}, P{1}, F{2} Welch".format( self.filename, pindex, freq_index)) f, spec = self.create_welch(freq_samples) self.fix_freq(f, freq_index) LOGGER.info("{0}, P{1}, F{2} Welch Saving".format( self.filename, pindex, freq_index)) self.save_welch((f, spec)) # Lombscargle try: LOGGER.info("{0}, P{1}, F{2} Lombscargle".format( self.filename, pindex, freq_index)) f, pxx = self.create_lombscargle(freq_samples) self.fix_freq(f, freq_index) LOGGER.info("{0}, P{1}, F{2} Lombscargle Saving".format( self.filename, pindex, freq_index)) self.save_lombscargle((f, pxx)) except ZeroDivisionError: print("Zero division in Lombscargle") # RFFT LOGGER.info("{0}, P{1}, F{2} RFFT".format( self.filename, pindex, freq_index)) f, ft = self.create_rfft(freq_samples) self.fix_freq(f, freq_index) LOGGER.info("{0}, P{1}, F{2} RFFT Saving".format( self.filename, pindex, freq_index)) self.save_rfft((f, ft)) # IFFT LOGGER.info("{0}, P{1}, F{2} IRFFT".format( self.filename, pindex, freq_index)) f, ft = self.create_ifft(freq_samples) self.fix_freq(f, freq_index) LOGGER.info("{0}, P{1}, F{2} IRFFT Saving".format( self.filename, pindex, freq_index)) self.save_ifft((f, ft)) # power spectral density LOGGER.info("{0}, P{1}, F{2} PSD".format( self.filename, pindex, freq_index)) pxx, f = self.create_psd(freq_samples) self.fix_freq(f, freq_index) LOGGER.info("{0}, P{1}, F{2} PSD Saving".format( self.filename, pindex, freq_index)) self.save_psd((pxx, f), "Power Spectral Density", "Power Spectral Density [V/rtMHz]") # amplitude spectral density LOGGER.info("{0}, P{1}, F{2} ASD".format( self.filename, pindex, freq_index)) np.sqrt(pxx, pxx) LOGGER.info("{0}, P{1}, F{2} ASD Saving".format( self.filename, pindex, freq_index)) self.save_psd((pxx, f), "Amplitude Spectral Density", "Amplitude Spectral Density [sqrt(V/rtMHz)]") self.frequency = None # Create merged spectrograms for this p """LOGGER.info("{0}, P{1} Merged Spectrograms...".format(self.filename, pindex))