def main(): start_time = time.time() window_size = int(round(WINDOW_SIZE * SAMPLE_RATE)) hop_size = int(round(HOP_SIZE * SAMPLE_RATE)) dft_size = tfa_utils.get_dft_size(window_size) print('window size', window_size) print('hop size', hop_size) print('DFT size', dft_size) num_spectra = int(round(APPROXIMATE_READ_SIZE / hop_size)) usual_read_size = (num_spectra - 1) * hop_size + window_size window = HannWindow(window_size).samples reader = WaveAudioFileReader(str(FILE_PATH), mono_1d=True) length = reader.length index = 0 while length - index >= window_size: # print(index) read_size = min(usual_read_size, length - index) samples = reader.read(index, read_size) gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size) # inband_powers = compute_inband_powers(gram) num_spectra = len(gram) index += num_spectra * hop_size end_time = time.time() elapsed = end_time - start_time duration = reader.length / reader.sample_rate rate = duration / elapsed print(('Processed {:.1f} seconds of audio in {:.1f} seconds, {:.1f} ' 'times faster than real time.').format(duration, elapsed, rate))
def main(): start_time = time.time() window_size = int(round(WINDOW_SIZE * SAMPLE_RATE)) hop_size = int(round(HOP_SIZE * SAMPLE_RATE)) dft_size = tfa_utils.get_dft_size(window_size) print('window size', window_size) print('hop size', hop_size) print('DFT size', dft_size) num_spectra = int(round(APPROXIMATE_READ_SIZE / hop_size)) usual_read_size = (num_spectra - 1) * hop_size + window_size window = HannWindow(window_size).samples reader = WaveAudioFileReader(str(FILE_PATH), mono_1d=True) length = reader.length index = 0 while length - index >= window_size: # print(index) read_size = min(usual_read_size, length - index) samples = reader.read(index, read_size) gram = tfa_utils.compute_spectrogram( samples, window, hop_size, dft_size) inband_powers = compute_inband_powers(gram) num_spectra = len(inband_powers) index += num_spectra * hop_size end_time = time.time() elapsed = end_time - start_time duration = reader.length / reader.sample_rate rate = duration / elapsed print( ('Processed {:.1f} seconds of audio in {:.1f} seconds, {:.1f} ' 'times faster than real time.').format(duration, elapsed, rate))
def _get_gram_channel_samples(waveform_samples, window, hop_size, dft_size): gram = tfa_utils.compute_spectrogram( waveform_samples, window, hop_size, dft_size) tfa_utils.scale_spectrogram(gram, out=gram) return gram
def plot_spectrogram(samples, sample_rate, title, pdf_file): window_size_sec = .005 hop_size_percent = 20 window_size = int(round(window_size_sec * sample_rate)) window = signal.hanning(window_size, sym=False) hop_size = \ int(round(window_size_sec * hop_size_percent / 100 * sample_rate)) dft_size = 2 * tfa_utils.get_dft_size(window_size) gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size) gram = tfa_utils.linear_to_log(gram) # plot_histogram(gram) hop_size_sec = window_size_sec * hop_size_percent / 100 times = np.arange(len(gram)) * hop_size_sec + window_size_sec / 2 num_bins = dft_size / 2 + 1 bin_size = sample_rate / dft_size freqs = np.arange(num_bins) * bin_size x = gram.transpose() plt.figure(figsize=(12, 6)) start_time = times[0] - hop_size_sec / 2 end_time = times[-1] + hop_size_sec / 2 start_freq = freqs[0] end_freq = freqs[-1] extent = (start_time, end_time, start_freq, end_freq) # `vmin` and `vmax` were chosen by looking at histogram of spectrogram # values plotted by `plot_histogram` function. plt.imshow(x, cmap='gray_r', vmin=-25, vmax=125, origin='lower', extent=extent, aspect='auto') plt.title(title) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') # plt.ylim(0, 11000) pdf_file.savefig() plt.close()
def _analyze(self): spectra = tfa_utils.compute_spectrogram(self.audio.samples, self.window.samples, self.hop_size, self.dft_size) tfa_utils.scale_spectrogram(spectra, out=spectra) if self.reference_power is not None: tfa_utils.linear_to_log(spectra, self.reference_power, out=spectra) return spectra
def _analyze(self): spectra = tfa_utils.compute_spectrogram( self.audio.samples, self.window.samples, self.hop_size, self.dft_size) tfa_utils.scale_spectrogram(spectra, out=spectra) if self.reference_power is not None: tfa_utils.linear_to_log(spectra, self.reference_power, out=spectra) return spectra
def compute_vesper_spectrogram(waveform, window_size, hop_size): window = data_windows.create_window('Hann', window_size).samples print('Computing Vesper spectrogram...') start_time = time.time() gram = tfa_utils.compute_spectrogram(waveform, window, hop_size) end_time = time.time() print('Done.') report_performance(gram, start_time, end_time) return gram
def plot_spectrogram(samples, sample_rate, title, pdf_file): window_size_sec = .005 hop_size_percent = 20 window_size = int(round(window_size_sec * sample_rate)) window = signal.hanning(window_size, sym=False) hop_size = \ int(round(window_size_sec * hop_size_percent / 100 * sample_rate)) dft_size = 2 * tfa_utils.get_dft_size(window_size) gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size) gram = tfa_utils.linear_to_log(gram) # plot_histogram(gram) hop_size_sec = window_size_sec * hop_size_percent / 100 times = np.arange(len(gram)) * hop_size_sec + window_size_sec / 2 num_bins = dft_size / 2 + 1 bin_size = sample_rate / dft_size freqs = np.arange(num_bins) * bin_size x = gram.transpose() plt.figure(figsize=(12, 6)) start_time = times[0] - hop_size_sec / 2 end_time = times[-1] + hop_size_sec / 2 start_freq = freqs[0] end_freq = freqs[-1] extent = (start_time, end_time, start_freq, end_freq) # `vmin` and `vmax` were chosen by looking at histogram of spectrogram # values plotted by `plot_histogram` function. plt.imshow( x, cmap='gray_r', vmin=-25, vmax=125, origin='lower', extent=extent, aspect='auto') plt.title(title) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') # plt.ylim(0, 11000) pdf_file.savefig() plt.close()
def _test_compute_spectrogram( self, num_channels, dft_size, hop_size, bin_num): num_samples = dft_size * 2 samples = self._create_test_signal( num_channels, num_samples, dft_size, bin_num) window = np.ones(dft_size) spectra = tfa_utils.compute_spectrogram( samples, window, hop_size, dft_size) expected = self._get_expected_spectra( num_channels, num_samples, hop_size, dft_size, bin_num) self.assertTrue(np.allclose(spectra, expected))
def _compute_channel_gram(self, channel_num, start_frame, end_frame): s = self._settings window_size = len(s.window) hop_size = s.hop_size start = start_frame * hop_size gram_frame_count = end_frame - start_frame waveform_frame_count = _get_waveform_frame_count( gram_frame_count, window_size, hop_size) end = start + waveform_frame_count samples = self._waveform.channels[channel_num][start:end] gram = tfa_utils.compute_spectrogram(samples, s.window, hop_size, s.dft_size) tfa_utils.scale_spectrogram(gram, out=gram) return gram
def process(self, x): return tfa_utils.compute_spectrogram( x, self.window, self.hop_size, self.dft_size)
def _compute_spectrogram(self, waveform): s = self._spectrogram_settings spectrogram = tfa_utils.compute_spectrogram( waveform, s.window.samples, s.hop_size, s.dft_size) return tfa_utils.linear_to_log(spectrogram, s.reference_power)
def process(self, x): return tfa_utils.compute_spectrogram(x, self.window, self.hop_size, self.dft_size)
def _compute_spectrogram(self, waveform): s = self._spectrogram_settings spectrogram = tfa_utils.compute_spectrogram(waveform, s.window.samples, s.hop_size, s.dft_size) return tfa_utils.linear_to_log(spectrogram, s.reference_power)