예제 #1
0
 def test_framewise_rms_energy_vad_decisions(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         vad = audio.framewise_rms_energy_vad_decisions(s, r, 25)
         assert (vad.numpy() == 1).all()
     vad = audio.framewise_rms_energy_vad_decisions(np.zeros(3*16000), 16000, 25)
     assert (vad.numpy() == 0).all()
예제 #2
0
 def test_wav_to_pcm_data(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         h, b = audio.wav_to_pcm_data(s, r)
         assert len(h.numpy()) == 44, "unexpected wav header length"
         assert h.numpy()[:4].decode("ascii") == "RIFF", "wav header did not begin with 'RIFF'"
         assert len(b.numpy()) == 2 * s.shape[0], "unexpected wav data length, expected sample width of 2"
예제 #3
0
 def test_peak_normalize(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         s1 = s + np.random.normal(0, 10, s.shape)
         for level in range(0, -10, -1):
             s2 = audio.peak_normalize(s1, dBFS=level)
             assert not np.isnan(s2.numpy()).any()
             assert np.max(np.abs(s2)) <= audio.dBFS_to_linear(level), "maximum amplitude cannot exceed given dBFS level after peak normalization"
예제 #4
0
 def test_resample(self):
     for path in audiofiles:
         s1, r1 = audio.read_wav(path)
         s2, r2 = audio.pyfunc_resample(s1, r1, 2*r1)
         assert r2 == 2*r1
         assert not np.isnan(s2.numpy()).any(), "NaNs after resampling"
         assert len(s2.shape) == len(s1.shape), "signal shape changed after resampling"
         assert s2.shape[0] == 2*s1.shape[0], "unexpected signal length after resampling"
예제 #5
0
 def test_remove_silence(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         s1 = audio.remove_silence(s, r)
         assert not np.isnan(s1.numpy()).any()
         assert s1.shape == s.shape
     s1 = audio.remove_silence(np.zeros(3*16000), 16000)
     assert not np.isnan(s1.numpy()).any()
     assert tf.size(s1) == 0
예제 #6
0
 def test_linear_to_mel(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         for num_mel_bins in range(10, 100, 15):
             powspecs = audio.spectrograms(np.expand_dims(s, 0), r)
             melspec = audio.linear_to_mel(powspecs, r, num_mel_bins=num_mel_bins)[0]
             assert not np.isnan(melspec.numpy()).any()
             assert melspec.shape[0] == powspecs[0].shape[0]
             assert melspec.shape[1] == num_mel_bins
예제 #7
0
 def test_power_to_db(self):
     for top_db in range(10, 110, 10):
         for path in audiofiles:
             s, r = audio.read_wav(path)
             _, _, stft = scipy.signal.stft(s)
             powspec = np.abs(stft)**2
             dbspec = audio.power_to_db(np.expand_dims(powspec, 0), top_db=float(top_db))[0].numpy()
             assert not np.isnan(dbspec).any()
             assert dbspec.max() <= 0
예제 #8
0
 def test_write_mono_wav(self):
     for inpath in audiofiles:
         s, r = audio.read_wav(inpath)
         with tempfile.TemporaryDirectory() as tmpdir:
             outpath = os.path.join(tmpdir, os.path.basename(inpath))
             wrotepath = audio.write_mono_wav(outpath, s, r)
             assert os.path.exists(outpath)
             assert wrotepath == outpath
             assert librosa.get_duration(filename=outpath, sr=None) == (s.shape[0] / r)
             assert librosa.get_samplerate(outpath) == r
             s1, r1 = librosa.load(outpath, sr=None)
             assert not np.isnan(s1).any()
             assert s1.shape == s.shape
             assert r1 == r
예제 #9
0
 def test_spectrograms(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         for len_ms in range(20, 101, 20):
             for n_fft in (256, 512, 1024, 2048):
                 if n_fft < audio.ms_to_frames(r, len_ms):
                     continue
                 step_ms = len_ms // 2
                 powspec = audio.spectrograms(np.expand_dims(s, 0), r,
                         frame_length_ms=len_ms,
                         frame_step_ms=step_ms,
                         fft_length=n_fft)[0]
                 assert not np.isnan(powspec.numpy()).any()
                 assert powspec.shape[0] == s.shape[0] // audio.ms_to_frames(r, step_ms) - 1
                 assert powspec.shape[1] == n_fft // 2 + 1
예제 #10
0
파일: steps.py 프로젝트: Yaffa16/lidbox
 def add_random_noise_and_flatten(x):
     # Random noise path indexes and random snr levels
     rand_noise = [(noise_type,
                    tf.random.uniform([], 0,
                                      tf.size(type2paths[noise_type]),
                                      tf.int64),
                    tf.random.uniform([], snr_low, snr_high, tf.float32))
                   for noise_type, snr_low, snr_high in snr_list]
     # Load random noise signals with drawn indexes
     rand_noise = [
         (audio_features.read_wav(type2paths[noise_type][rand_index]), snr)
         for noise_type, rand_index, snr in rand_noise
     ]
     # Assert sample rates
     # TODO maybe add inline resampling
     for (noise, sample_rate), snr in rand_noise:
         tf.debugging.assert_equal(
             sample_rate,
             x["sample_rate"],
             message=
             "Invalid noise signals are being used, all noise signals must have same sample rate as speech signals that are being augmented"
         )
     # Fix noise signal length to match x["signal"] by repeating the noise signal if it is too short
     rand_noise = [
         (tf.cast(tf.size(x["signal"]) / tf.size(noise),
                  tf.int32), noise, snr) for (noise, _), snr in rand_noise
     ]
     rand_noise = [(tf.tile(noise,
                            [1 + noise_length_ratio])[:tf.size(noise)], snr)
                   for noise_length_ratio, noise, snr in rand_noise]
     mixed_signals = [
         audio_features.snr_mixer(x["signal"], noise, snr)[2]
         for noise, snr in rand_noise
     ]
     new_ids = [
         tf.strings.join(
             (x["id"], noise_type, tf.strings.as_string(snr, precision=2)),
             separator="-") for noise_type, snr in rand_noise
     ]
     signal_ds = tf.data.Dataset.from_tensor_slices(mixed_signals)
     repeat_x_ds = tf.data.Dataset.from_tensors(x).repeat(
         len(mixed_signals))
     return (tf.data.Dataset.from_tensor_slices(
         (new_ids, mixed_signals,
          len(mixed_signals) * [x])).map(update_element_meta))
예제 #11
0
파일: steps.py 프로젝트: gaoyiyeah/lidbox
 def _add_random_noise_and_flatten(x):
     """
     Using snr_list, choose len(snr_list) noise signals randomly and create new signal samples by mixing the chosen noise signals with x["signal"] using random SNR dB levels.
     """
     # Random noise path indexes and random snr levels
     rand_noise = [
             (noise_type,
              tf.random.uniform([], 0, tf.size(type2paths[noise_type]), tf.int32),
              tf.random.uniform([], snr_low, snr_high, tf.float32))
             for noise_type, snr_low, snr_high in snr_list]
     # Select random noise signals by drawn indexes and read contents from files
     rand_noise = [
             (audio_features.read_wav(type2paths[noise_type][rand_index]), snr)
             for noise_type, rand_index, snr in rand_noise]
     # Assert sample rates
     # TODO maybe add inline resampling of noise signals so they match the speech sr
     for (noise, sample_rate), snr in rand_noise:
         tf.debugging.assert_equal(sample_rate, x["sample_rate"], message="Invalid noise signals are being used, all noise signals must have same sample rate as speech signals that are being augmented")
     # Fix noise signal length to match x["signal"] by repeating the noise signal if it is too short and then slicing it
     rand_noise = [
             # How many multiples of `noise` fits in x["signal"]
             (tf.cast(tf.size(x["signal"]) / tf.size(noise), tf.int32), noise, snr)
             for (noise, _), snr in rand_noise]
     rand_noise = [
             # Repeat noise and slice
             (tf.tile(noise, [1 + noise_length_ratio])[:tf.size(x["signal"])], snr)
             for noise_length_ratio, noise, snr in rand_noise]
     # Mix x["signal"] and chosen noise signals
     mixed_signals = [audio_features.snr_mixer(x["signal"], noise, snr)[2] for noise, snr in rand_noise]
     # Create new utterance ids that contain the mixed noise type and SNR level
     new_ids = [
             tf.strings.join((
                     "augmented",
                     x["id"],
                     noise_type,
                     tf.strings.join(("snr", tf.strings.as_string(snr, precision=2)))),
                 separator="-")
             for (noise_type, _, _), (_, snr) in zip(snr_list, rand_noise)]
     # Create new elements from the mixed signals and return as dataset
     return (tf.data.Dataset
               .zip((tf.data.Dataset.from_tensor_slices(new_ids),
                     tf.data.Dataset.from_tensor_slices(mixed_signals),
                     tf.data.Dataset.from_tensors(x).repeat(len(mixed_signals))))
               .map(_update_element_meta))
예제 #12
0
 def test_read_wav(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         assert not np.isnan(s.numpy()).any(), "NaNs in signal"
         assert s.shape == (3*16000,), "unexpected signal shape"
         assert r == 16000, "unexpected sample rate"
예제 #13
0
파일: steps.py 프로젝트: Yaffa16/lidbox
 def append_signals(x):
     signal, sample_rate = audio_features.read_wav(x["path"])
     return dict(x, signal=signal, sample_rate=sample_rate)