def test_fill_zeros(self):
        samples, sample_rate = load_sound_file(
            os.path.join(DEMO_DIR, "acoustic_guitar_0.wav"), sample_rate=None
        )
        magnitude_spectrogram = librosa.feature.melspectrogram(
            y=samples, sr=sample_rate
        )

        mask_fraction = 0.05
        transform = SpecFrequencyMask(
            fill_mode="constant",
            fill_constant=0.0,
            min_mask_fraction=mask_fraction,
            max_mask_fraction=mask_fraction,
            p=1.0,
        )
        augmented_spectrogram = transform(magnitude_spectrogram)

        if DEBUG:
            plot_matrix(np.log(augmented_spectrogram))

        with np.testing.assert_raises(AssertionError):
            np.testing.assert_array_equal(augmented_spectrogram, magnitude_spectrogram)

        num_zeroed_frequencies = 0
        for i in range(augmented_spectrogram.shape[0]):
            if sum(augmented_spectrogram[i]) == 0.0:
                num_zeroed_frequencies += 1

        self.assertEqual(
            num_zeroed_frequencies,
            int(round(magnitude_spectrogram.shape[0] * mask_fraction)),
        )
    def test_fill_mean_multichannel(self):
        samples, sample_rate = load_sound_file(
            os.path.join(DEMO_DIR, "background_noises", "hens.ogg"),
            sample_rate=None,
            mono=False,
        )
        assert samples.shape[0] == 2
        magnitude_spectrogram_chn0 = librosa.feature.melspectrogram(
            y=np.asfortranarray(samples[0, :]), sr=sample_rate
        )
        magnitude_spectrogram_chn1 = librosa.feature.melspectrogram(
            y=np.asfortranarray(samples[1, :]), sr=sample_rate
        )
        multichannel_magnitude_spectrogram = np.zeros(
            shape=(
                magnitude_spectrogram_chn0.shape[0],
                magnitude_spectrogram_chn0.shape[1],
                3,
            ),
            dtype=np.float32,
        )
        multichannel_magnitude_spectrogram[:, :, 0] = magnitude_spectrogram_chn0
        multichannel_magnitude_spectrogram[:, :, 1] = magnitude_spectrogram_chn1
        multichannel_magnitude_spectrogram[:, :, 2] = magnitude_spectrogram_chn1

        mask_fraction = 0.05
        transform = SpecFrequencyMask(
            fill_mode="mean",
            min_mask_fraction=mask_fraction,
            max_mask_fraction=mask_fraction,
            p=1.0,
        )
        augmented_spectrogram = transform(multichannel_magnitude_spectrogram)

        if DEBUG:
            image = (7 + np.log10(augmented_spectrogram + 0.0000001)) / 8
            plot_matrix(image)

        with np.testing.assert_raises(AssertionError):
            np.testing.assert_array_equal(
                augmented_spectrogram, multichannel_magnitude_spectrogram
            )

        num_masked_frequencies = 0
        for i in range(augmented_spectrogram.shape[0]):
            frequency_slice = augmented_spectrogram[i]
            if (
                np.amin(frequency_slice) == np.amax(frequency_slice)
                and np.sum(frequency_slice) != 0.0
            ):
                num_masked_frequencies += 1

        self.assertEqual(
            num_masked_frequencies,
            int(round(multichannel_magnitude_spectrogram.shape[0] * mask_fraction)),
        )
    def test_shuffle_channels_mono(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "acoustic_guitar_0.wav"),
                                               sample_rate=None)
        magnitude_spectrogram = librosa.feature.melspectrogram(y=samples,
                                                               sr=sample_rate)

        transform = SpecChannelShuffle(p=1.0)
        with self.assertRaises(MonoAudioNotSupportedException):
            augmented_spectrogram = transform(magnitude_spectrogram)
    def test_shuffle_channels(self):
        samples, sample_rate = load_sound_file(
            os.path.join(DEMO_DIR, "background_noises", "hens.ogg"),
            sample_rate=None,
            mono=False,
        )
        assert samples.shape[0] == 2
        magnitude_spectrogram_chn0 = librosa.feature.melspectrogram(
            y=np.asfortranarray(samples[0, :]), sr=sample_rate)
        magnitude_spectrogram_chn1 = librosa.feature.melspectrogram(
            y=np.asfortranarray(samples[1, :]), sr=sample_rate)
        multichannel_magnitude_spectrogram = np.zeros(
            shape=(
                magnitude_spectrogram_chn0.shape[0],
                magnitude_spectrogram_chn0.shape[1],
                3,
            ),
            dtype=np.float32,
        )
        multichannel_magnitude_spectrogram[:, :,
                                           0] = magnitude_spectrogram_chn0
        multichannel_magnitude_spectrogram[:, :,
                                           1] = magnitude_spectrogram_chn1
        multichannel_magnitude_spectrogram[:, :,
                                           2] = magnitude_spectrogram_chn1 * 0.7

        if DEBUG:
            image = (7 + np.log10(multichannel_magnitude_spectrogram +
                                  0.0000001)) / 8
            plot_matrix(image, title="before")

        # Make the shuffled channels do not equal the original order
        transform = SpecChannelShuffle(p=1.0)
        for _ in range(100000):
            transform.randomize_parameters(multichannel_magnitude_spectrogram)
            if transform.parameters["shuffled_channel_indexes"] != [0, 1, 2]:
                break
        transform.freeze_parameters()

        augmented_spectrogram = transform(multichannel_magnitude_spectrogram)

        if DEBUG:
            image = (7 + np.log10(augmented_spectrogram + 0.0000001)) / 8
            plot_matrix(image, title="after")

        with np.testing.assert_raises(AssertionError):
            np.testing.assert_array_equal(augmented_spectrogram,
                                          multichannel_magnitude_spectrogram)

        for augmented_index, original_index in enumerate(
                transform.parameters.get("shuffled_channel_indexes")):
            np.testing.assert_array_equal(
                augmented_spectrogram[:, :, augmented_index],
                multichannel_magnitude_spectrogram[:, :, original_index],
            )
Ejemplo n.º 5
0
    def test_load_stereo_signed_16_bit_wav(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "stereo_16bit.wav"),
                                               sample_rate=None)
        self.assertEqual(sample_rate, 16000)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        self.assertEqual(samples.shape[0], 17833)

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.5)
        self.assertLess(max_value, 1.0)
Ejemplo n.º 6
0
    def test_load_mono_ms_adpcm(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "ms_adpcm.wav"),
                                               sample_rate=None)
        self.assertEqual(sample_rate, 11024)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        self.assertEqual(samples.shape[0], 895500)

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.3)
        self.assertLess(max_value, 1.0)
Ejemplo n.º 7
0
    def test_load_mono_signed_24_bit_wav(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "signed_24bit.wav"),
                                               sample_rate=None)
        self.assertEqual(sample_rate, 48000)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        self.assertEqual(samples.shape[0], 54514)

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.09)
        self.assertLess(max_value, 1.0)
Ejemplo n.º 8
0
    def test_load_mono_m4a(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "testing.m4a"),
                                               sample_rate=None)
        self.assertEqual(sample_rate, 44100)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        self.assertGreaterEqual(samples.shape[0], 141312)
        self.assertLessEqual(samples.shape[0], 141312)

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.1)
        self.assertLess(max_value, 1.0)
Ejemplo n.º 9
0
    def test_load_mono_opus(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "bus.opus"),
                                               sample_rate=None)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        # Apparently, the exact duration may vary slightly based on which decoder is used
        self.assertGreaterEqual(samples.shape[0], 36682)
        self.assertLessEqual(samples.shape[0], 36994)

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.3)
        self.assertLess(max_value, 1.0)
Ejemplo n.º 10
0
    def test_load_stereo_ogg_vorbis(self):
        samples, sample_rate = load_sound_file(os.path.join(
            DEMO_DIR, "background_noises", "hens.ogg"),
                                               sample_rate=None)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        # Apparently, the exact duration may vary slightly based on which decoder is used
        self.assertGreaterEqual(samples.shape[0], 442575)
        self.assertLessEqual(samples.shape[0], 443328)

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.02)
        self.assertLess(max_value, 1.0)
    def test_fill_mean(self):
        samples, sample_rate = load_sound_file(
            os.path.join(DEMO_DIR, "acoustic_guitar_0.wav"), sample_rate=None
        )
        magnitude_spectrogram = librosa.feature.melspectrogram(
            y=samples, sr=sample_rate
        )

        min_mask_fraction = 0.05
        max_mask_fraction = 0.09
        transform = SpecFrequencyMask(
            fill_mode="mean",
            min_mask_fraction=min_mask_fraction,
            max_mask_fraction=max_mask_fraction,
            p=1.0,
        )
        augmented_spectrogram = transform(magnitude_spectrogram)

        if DEBUG:
            plot_matrix(np.log(augmented_spectrogram))

        num_masked_frequencies = 0
        for i in range(augmented_spectrogram.shape[0]):
            frequency_slice = augmented_spectrogram[i]
            if (
                np.amin(frequency_slice) == np.amax(frequency_slice)
                and sum(frequency_slice) != 0.0
            ):
                num_masked_frequencies += 1

        self.assertGreaterEqual(
            num_masked_frequencies,
            int(round(magnitude_spectrogram.shape[0] * min_mask_fraction)),
        )
        self.assertLessEqual(
            num_masked_frequencies,
            int(round(magnitude_spectrogram.shape[0] * max_mask_fraction)),
        )
Ejemplo n.º 12
0
    def test_load_mono_ms_adpcm_and_resample(self):
        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")

            samples, sample_rate = load_sound_file(os.path.join(
                DEMO_DIR, "ms_adpcm.wav"),
                                                   sample_rate=16000)

            assert len(w) == 1
            assert (
                "resampled from 11024 hz to 16000 hz. This hurt execution time"
                in str(w[-1].message))

        self.assertEqual(sample_rate, 16000)
        self.assertEqual(samples.dtype, np.float32)
        self.assertEqual(len(samples.shape), 1)

        self.assertEqual(samples.shape[0], math.ceil(895500 * 16000 / 11024))

        max_value = np.amax(samples)
        self.assertGreater(max_value, 0.3)
        self.assertLess(max_value, 1.0)
Ejemplo n.º 13
0
            "instance": TimeMask(p=1.0),
            "num_runs": 5
        },
        {
            "instance": TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0),
            "num_runs": 5
        },
        {
            "instance": Trim(p=1.0),
            "num_runs": 1
        },
    ]

    for sound_file_path in sound_file_paths:
        samples, sample_rate = load_sound_file(sound_file_path,
                                               sample_rate=None,
                                               mono=False)
        if len(samples.shape) == 2 and samples.shape[0] > samples.shape[1]:
            samples = samples.transpose()

        print("Transforming {} with shape {}".format(sound_file_path.name,
                                                     str(samples.shape)))
        execution_times = {}

        for transform in transforms:
            augmenter = transform["instance"]
            run_name = (transform.get("name") if transform.get("name") else
                        transform["instance"].__class__.__name__)
            execution_times[run_name] = []
            for i in range(transform["num_runs"]):
                output_file_path = os.path.join(
Ejemplo n.º 14
0
 def __load_sound(file_path, sample_rate):
     return load_sound_file(file_path, sample_rate)