def test_fill_zeros(self): samples, sample_rate = load_sound_file( os.path.join(DEMO_DIR, "acoustic_guitar_0.wav"), sample_rate=None ) magnitude_spectrogram = librosa.feature.melspectrogram( y=samples, sr=sample_rate ) mask_fraction = 0.05 transform = SpecFrequencyMask( fill_mode="constant", fill_constant=0.0, min_mask_fraction=mask_fraction, max_mask_fraction=mask_fraction, p=1.0, ) augmented_spectrogram = transform(magnitude_spectrogram) if DEBUG: plot_matrix(np.log(augmented_spectrogram)) with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(augmented_spectrogram, magnitude_spectrogram) num_zeroed_frequencies = 0 for i in range(augmented_spectrogram.shape[0]): if sum(augmented_spectrogram[i]) == 0.0: num_zeroed_frequencies += 1 self.assertEqual( num_zeroed_frequencies, int(round(magnitude_spectrogram.shape[0] * mask_fraction)), )
def test_fill_mean_multichannel(self): samples, sample_rate = load_sound_file( os.path.join(DEMO_DIR, "background_noises", "hens.ogg"), sample_rate=None, mono=False, ) assert samples.shape[0] == 2 magnitude_spectrogram_chn0 = librosa.feature.melspectrogram( y=np.asfortranarray(samples[0, :]), sr=sample_rate ) magnitude_spectrogram_chn1 = librosa.feature.melspectrogram( y=np.asfortranarray(samples[1, :]), sr=sample_rate ) multichannel_magnitude_spectrogram = np.zeros( shape=( magnitude_spectrogram_chn0.shape[0], magnitude_spectrogram_chn0.shape[1], 3, ), dtype=np.float32, ) multichannel_magnitude_spectrogram[:, :, 0] = magnitude_spectrogram_chn0 multichannel_magnitude_spectrogram[:, :, 1] = magnitude_spectrogram_chn1 multichannel_magnitude_spectrogram[:, :, 2] = magnitude_spectrogram_chn1 mask_fraction = 0.05 transform = SpecFrequencyMask( fill_mode="mean", min_mask_fraction=mask_fraction, max_mask_fraction=mask_fraction, p=1.0, ) augmented_spectrogram = transform(multichannel_magnitude_spectrogram) if DEBUG: image = (7 + np.log10(augmented_spectrogram + 0.0000001)) / 8 plot_matrix(image) with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal( augmented_spectrogram, multichannel_magnitude_spectrogram ) num_masked_frequencies = 0 for i in range(augmented_spectrogram.shape[0]): frequency_slice = augmented_spectrogram[i] if ( np.amin(frequency_slice) == np.amax(frequency_slice) and np.sum(frequency_slice) != 0.0 ): num_masked_frequencies += 1 self.assertEqual( num_masked_frequencies, int(round(multichannel_magnitude_spectrogram.shape[0] * mask_fraction)), )
def test_shuffle_channels_mono(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "acoustic_guitar_0.wav"), sample_rate=None) magnitude_spectrogram = librosa.feature.melspectrogram(y=samples, sr=sample_rate) transform = SpecChannelShuffle(p=1.0) with self.assertRaises(MonoAudioNotSupportedException): augmented_spectrogram = transform(magnitude_spectrogram)
def test_shuffle_channels(self): samples, sample_rate = load_sound_file( os.path.join(DEMO_DIR, "background_noises", "hens.ogg"), sample_rate=None, mono=False, ) assert samples.shape[0] == 2 magnitude_spectrogram_chn0 = librosa.feature.melspectrogram( y=np.asfortranarray(samples[0, :]), sr=sample_rate) magnitude_spectrogram_chn1 = librosa.feature.melspectrogram( y=np.asfortranarray(samples[1, :]), sr=sample_rate) multichannel_magnitude_spectrogram = np.zeros( shape=( magnitude_spectrogram_chn0.shape[0], magnitude_spectrogram_chn0.shape[1], 3, ), dtype=np.float32, ) multichannel_magnitude_spectrogram[:, :, 0] = magnitude_spectrogram_chn0 multichannel_magnitude_spectrogram[:, :, 1] = magnitude_spectrogram_chn1 multichannel_magnitude_spectrogram[:, :, 2] = magnitude_spectrogram_chn1 * 0.7 if DEBUG: image = (7 + np.log10(multichannel_magnitude_spectrogram + 0.0000001)) / 8 plot_matrix(image, title="before") # Make the shuffled channels do not equal the original order transform = SpecChannelShuffle(p=1.0) for _ in range(100000): transform.randomize_parameters(multichannel_magnitude_spectrogram) if transform.parameters["shuffled_channel_indexes"] != [0, 1, 2]: break transform.freeze_parameters() augmented_spectrogram = transform(multichannel_magnitude_spectrogram) if DEBUG: image = (7 + np.log10(augmented_spectrogram + 0.0000001)) / 8 plot_matrix(image, title="after") with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(augmented_spectrogram, multichannel_magnitude_spectrogram) for augmented_index, original_index in enumerate( transform.parameters.get("shuffled_channel_indexes")): np.testing.assert_array_equal( augmented_spectrogram[:, :, augmented_index], multichannel_magnitude_spectrogram[:, :, original_index], )
def test_load_stereo_signed_16_bit_wav(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "stereo_16bit.wav"), sample_rate=None) self.assertEqual(sample_rate, 16000) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) self.assertEqual(samples.shape[0], 17833) max_value = np.amax(samples) self.assertGreater(max_value, 0.5) self.assertLess(max_value, 1.0)
def test_load_mono_ms_adpcm(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "ms_adpcm.wav"), sample_rate=None) self.assertEqual(sample_rate, 11024) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) self.assertEqual(samples.shape[0], 895500) max_value = np.amax(samples) self.assertGreater(max_value, 0.3) self.assertLess(max_value, 1.0)
def test_load_mono_signed_24_bit_wav(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "signed_24bit.wav"), sample_rate=None) self.assertEqual(sample_rate, 48000) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) self.assertEqual(samples.shape[0], 54514) max_value = np.amax(samples) self.assertGreater(max_value, 0.09) self.assertLess(max_value, 1.0)
def test_load_mono_m4a(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "testing.m4a"), sample_rate=None) self.assertEqual(sample_rate, 44100) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) self.assertGreaterEqual(samples.shape[0], 141312) self.assertLessEqual(samples.shape[0], 141312) max_value = np.amax(samples) self.assertGreater(max_value, 0.1) self.assertLess(max_value, 1.0)
def test_load_mono_opus(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "bus.opus"), sample_rate=None) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) # Apparently, the exact duration may vary slightly based on which decoder is used self.assertGreaterEqual(samples.shape[0], 36682) self.assertLessEqual(samples.shape[0], 36994) max_value = np.amax(samples) self.assertGreater(max_value, 0.3) self.assertLess(max_value, 1.0)
def test_load_stereo_ogg_vorbis(self): samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "background_noises", "hens.ogg"), sample_rate=None) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) # Apparently, the exact duration may vary slightly based on which decoder is used self.assertGreaterEqual(samples.shape[0], 442575) self.assertLessEqual(samples.shape[0], 443328) max_value = np.amax(samples) self.assertGreater(max_value, 0.02) self.assertLess(max_value, 1.0)
def test_fill_mean(self): samples, sample_rate = load_sound_file( os.path.join(DEMO_DIR, "acoustic_guitar_0.wav"), sample_rate=None ) magnitude_spectrogram = librosa.feature.melspectrogram( y=samples, sr=sample_rate ) min_mask_fraction = 0.05 max_mask_fraction = 0.09 transform = SpecFrequencyMask( fill_mode="mean", min_mask_fraction=min_mask_fraction, max_mask_fraction=max_mask_fraction, p=1.0, ) augmented_spectrogram = transform(magnitude_spectrogram) if DEBUG: plot_matrix(np.log(augmented_spectrogram)) num_masked_frequencies = 0 for i in range(augmented_spectrogram.shape[0]): frequency_slice = augmented_spectrogram[i] if ( np.amin(frequency_slice) == np.amax(frequency_slice) and sum(frequency_slice) != 0.0 ): num_masked_frequencies += 1 self.assertGreaterEqual( num_masked_frequencies, int(round(magnitude_spectrogram.shape[0] * min_mask_fraction)), ) self.assertLessEqual( num_masked_frequencies, int(round(magnitude_spectrogram.shape[0] * max_mask_fraction)), )
def test_load_mono_ms_adpcm_and_resample(self): with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") samples, sample_rate = load_sound_file(os.path.join( DEMO_DIR, "ms_adpcm.wav"), sample_rate=16000) assert len(w) == 1 assert ( "resampled from 11024 hz to 16000 hz. This hurt execution time" in str(w[-1].message)) self.assertEqual(sample_rate, 16000) self.assertEqual(samples.dtype, np.float32) self.assertEqual(len(samples.shape), 1) self.assertEqual(samples.shape[0], math.ceil(895500 * 16000 / 11024)) max_value = np.amax(samples) self.assertGreater(max_value, 0.3) self.assertLess(max_value, 1.0)
"instance": TimeMask(p=1.0), "num_runs": 5 }, { "instance": TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0), "num_runs": 5 }, { "instance": Trim(p=1.0), "num_runs": 1 }, ] for sound_file_path in sound_file_paths: samples, sample_rate = load_sound_file(sound_file_path, sample_rate=None, mono=False) if len(samples.shape) == 2 and samples.shape[0] > samples.shape[1]: samples = samples.transpose() print("Transforming {} with shape {}".format(sound_file_path.name, str(samples.shape))) execution_times = {} for transform in transforms: augmenter = transform["instance"] run_name = (transform.get("name") if transform.get("name") else transform["instance"].__class__.__name__) execution_times[run_name] = [] for i in range(transform["num_runs"]): output_file_path = os.path.join(
def __load_sound(file_path, sample_rate): return load_sound_file(file_path, sample_rate)