Esempio n. 1
0
    def test_supported_modes_property(self):
        augment = Compose(transforms=[
            PeakNormalization(p=1.0),
        ], )
        assert augment.supported_modes == {
            "per_batch", "per_example", "per_channel"
        }

        augment = Compose(
            transforms=[PeakNormalization(p=1.0),
                        ShuffleChannels(p=1.0)], )
        assert augment.supported_modes == {"per_example"}
    def test_shuffle(self):
        random.seed(42)
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(min_gain_in_db=-18.0, max_gain_in_db=-16.0, p=1.0),
                PeakNormalization(p=1.0),
            ],
            shuffle=True,
            output_type="dict",
        )
        num_peak_normalization_last = 0
        num_gain_last = 0
        for i in range(100):
            processed_samples = augment(
                samples=torch.from_numpy(samples),
                sample_rate=sample_rate).samples.numpy()

            # Either PeakNormalization or Gain was applied last
            if processed_samples[0, 0, 0] < 0.2:
                num_gain_last += 1
            elif processed_samples[0, 0, 0] == 1.0:
                num_peak_normalization_last += 1
            else:
                raise AssertionError("Unexpected value!")

        self.assertGreater(num_peak_normalization_last, 10)
        self.assertGreater(num_gain_last, 10)
Esempio n. 3
0
 def training_step(self, batch, batch_nb):
     mix, source = batch
     apply_augmentation = Compose(transforms=[
         Gain(min_gain_in_db=-15.0,
              max_gain_in_db=5.0,
              p=0.5,
              mode="per_channel")
     ])
     source = apply_augmentation(source, sample_rate=22050)
     loss = self.common_step((mix, source), batch_nb, train=True)
     self.log("loss", loss, logger=True)
     return loss
    def test_freeze_and_unfreeze_parameters(self):
        torch.manual_seed(42)

        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(
                    min_gain_in_db=-16.000001,
                    max_gain_in_db=-2,
                    p=1.0,
                ),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        )

        processed_samples1 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        augment.freeze_parameters()
        processed_samples2 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        assert_array_equal(processed_samples1, processed_samples2)

        augment.unfreeze_parameters()
        processed_samples3 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        self.assertNotEqual(processed_samples1[0, 0, 0],
                            processed_samples3[0, 0, 0])
    def test_splice_out_odd_hann(self):

        audio_samples = torch.rand(size=(8, 1, 32000), dtype=torch.float32)
        augment = Compose(
            [
                SpliceOut(
                    num_time_intervals=10, max_width=400, output_type="dict"),
            ],
            output_type="dict",
        )
        splice_out_samples = augment(samples=audio_samples,
                                     sample_rate=16100).samples.numpy()

        assert splice_out_samples.dtype == np.float32
Esempio n. 6
0
    def test_compose_with_p_zero(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
                PolarityInversion(p=1.0),
            ],
            p=0.0,
        )
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        assert_array_equal(samples, processed_samples)
    def test_splice_out_multichannel(self):

        audio_samples = torch.rand(size=(8, 2, 32000), dtype=torch.float32)
        augment = Compose(
            [
                SpliceOut(
                    num_time_intervals=10, max_width=400, output_type="dict"),
            ],
            output_type="dict",
        )
        splice_out_samples = augment(samples=audio_samples,
                                     sample_rate=16000).samples.numpy()

        assert splice_out_samples.dtype == np.float32
        self.assertLess(splice_out_samples.sum(), audio_samples.numpy().sum())
        self.assertEqual(splice_out_samples.shape, audio_samples.shape)
Esempio n. 8
0
    def test_compose_with_torchaudio_transform(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            [Vol(gain=-6, gain_type="db"),
             PolarityInversion(p=1.0)])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
    def test_splice_out_cuda(self):

        audio_samples = (torch.rand(size=(8, 1, 32000),
                                    dtype=torch.float32,
                                    device=torch.device("cuda")) - 0.5)
        augment = Compose(
            [
                SpliceOut(
                    num_time_intervals=10, max_width=400, output_type="dict"),
            ],
            output_type="dict",
        )
        splice_out_samples = (augment(samples=audio_samples,
                                      sample_rate=16000).samples.cpu().numpy())

        assert splice_out_samples.dtype == np.float32
        self.assertLess(splice_out_samples.sum(),
                        audio_samples.cpu().numpy().sum())
Esempio n. 10
0
    def test_compose(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose([
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
            PolarityInversion(p=1.0),
        ])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
Esempio n. 11
0
    def training_step(self, batch, batch_nb):
        apply_augmentation = Compose(
            transforms=[
                Gain(
                    min_gain_in_db=-15.0,
                    max_gain_in_db=5.0,
                    p=0.5,
                    mode="per_channel"
                ),
                ShuffleChannels(
                    mode="per_example"
                ),
                PitchShift(min_transpose_semitones=-2, max_transpose_semitones=2, p=0.5, mode="per_example",sample_rate=44100),

            ]
        )
        batch = apply_augmentation(batch, sample_rate=44100)
        loss = self.common_step(batch, batch_nb, train=True)
        self.log("loss", loss, logger=True)
        return loss    
    def test_compose_without_specifying_output_type(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose([
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
            PolarityInversion(p=1.0),
        ])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate)
        # This dtype should be torch.Tensor until we switch to ObjectDict as default
        assert type(processed_samples) == torch.Tensor
        processed_samples = processed_samples.numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0, output_type="dict"),
        ApplyImpulseResponse(IR_PATH, p=1.0, output_type="dict"),
        Compose(
            transforms=[
                Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        ),
        Gain(min_gain_in_db=-6.000001,
             max_gain_in_db=-6,
             p=1.0,
             output_type="dict"),
        PolarityInversion(p=1.0, output_type="dict"),
        Shift(p=1.0, output_type="dict"),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
        pytest.param(
    LowPassFilter,
    HighPassFilter,
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0),
        ApplyImpulseResponse(IR_PATH, p=1.0),
        Compose(transforms=[
            Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
            PolarityInversion(p=1.0),
        ]),
        Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
        PolarityInversion(p=1.0),
        Shift(p=1.0),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
        pytest.param(HighPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
        pytest.param(LowPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
        pytest.param(PeakNormalization(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
Esempio n. 15
0
         background_paths=TEST_FIXTURES_DIR / "bg", mode=mode, p=1.0
     ),
     "num_runs": 5,
 },
 {
     "instance": ApplyImpulseResponse(
         ir_paths=TEST_FIXTURES_DIR / "ir", mode=mode, p=1.0
     ),
     "num_runs": 1,
 },
 {
     "instance": Compose(
         transforms=[
             Gain(
                 min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0
             ),
             PeakNormalization(mode=mode, p=1.0),
         ],
         shuffle=True,
     ),
     "name": "Shuffled Compose with Gain and PeakNormalization",
     "num_runs": 5,
 },
 {
     "instance": Compose(
         transforms=[
             Gain(
                 min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5
             ),
             PolarityInversion(mode=mode, p=0.5),
         ],
Esempio n. 16
0
import torch
from torch_audiomentations import Compose, Gain, PolarityInversion
import torchaudio

# Initialize augmentation callable
apply_augmentation = Compose(transforms=[
    Gain(
        min_gain_in_db=-15.0,
        max_gain_in_db=40.0,
        p=1.0,
    ),
    PolarityInversion(p=0.0)
])

torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Make an example tensor with white noise.
# This tensor represents 8 audio snippets with 2 channels (stereo) and 2 s of 16 kHz audio.
# audio_samples = torch.rand(size=(8, 2, 32000), dtype=torch.float32, device=torch_device) - 0.5
audio_samples = torchaudio.load(
    "/Users/bdubel/Documents/ZHAW/BA/data/eth_ch_dialects/ag/ch_ag_0107.wav")

# Apply augmentation. This varies the gain and polarity of (some of)
# the audio snippets in the batch independently.
perturbed_audio_samples = apply_augmentation(audio_samples[0],
                                             sample_rate=16000)
torchaudio.save(
    '/Users/bdubel/Documents/ZHAW/BA/data/swiss_all/perturbation/test1.flac',
    perturbed_audio_samples,
    sample_rate=16000)