def test_freeze_and_unfreeze_parameters(self): torch.manual_seed(42) samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32) sample_rate = 16000 augment = Compose( transforms=[ Gain( min_gain_in_db=-16.000001, max_gain_in_db=-2, p=1.0, ), PolarityInversion(p=1.0), ], output_type="dict", ) processed_samples1 = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).samples.numpy() augment.freeze_parameters() processed_samples2 = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).samples.numpy() assert_array_equal(processed_samples1, processed_samples2) augment.unfreeze_parameters() processed_samples3 = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).samples.numpy() self.assertNotEqual(processed_samples1[0, 0, 0], processed_samples3[0, 0, 0])
def setUp(self): self.sample_rate = 16000 self.audio = torch.randn(1, 1, 16000) self.transforms = [ Gain(min_gain_in_db=-6.000001, max_gain_in_db=-2, p=1.0), PolarityInversion(p=1.0), PeakNormalization(p=1.0), ]
def test_ndim_check(self): augment = PolarityInversion(p=1.0) # 1D tensor not allowed with pytest.raises(RuntimeError): augment(torch.tensor([1.0, 0.5, 0.25, 0.125], dtype=torch.float32)) # 2D tensor not allowed with pytest.raises(RuntimeError): augment(torch.tensor([[1.0, 0.5, 0.25, 0.125]], dtype=torch.float32)) # 4D tensor not allowed with pytest.raises(RuntimeError): augment(torch.tensor([[[[1.0, 0.5, 0.25, 0.125]]]], dtype=torch.float32))
def test_polarity_inversion_cuda(self): samples = np.array([[1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32) sample_rate = 16000 augment = PolarityInversion(p=1.0).cuda() inverted_samples = (augment(samples=torch.from_numpy(samples).cuda(), sample_rate=sample_rate).cpu().numpy()) assert_almost_equal( inverted_samples, np.array([[-1.0, -0.5, 0.25, 0.125, 0.0]], dtype=np.float32)) self.assertEqual(inverted_samples.dtype, np.float32)
def test_polarity_inversion_zero_probability(self): samples = np.array([[1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32) sample_rate = 16000 augment = PolarityInversion(p=0.0) processed_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).numpy() assert_almost_equal( processed_samples, np.array([[1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32), ) self.assertEqual(processed_samples.dtype, np.float32)
def test_polarity_inversion(self): samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32) sample_rate = 16000 augment = PolarityInversion(p=1.0, output_type="dict") inverted_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).samples.numpy() assert_almost_equal( inverted_samples, np.array([[[-1.0, -0.5, 0.25, 0.125, 0.0]]], dtype=np.float32), ) self.assertEqual(inverted_samples.dtype, np.float32)
def test_compose_with_p_zero(self): samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32) sample_rate = 16000 augment = Compose( transforms=[ Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0), PolarityInversion(p=1.0), ], p=0.0, ) processed_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).numpy() assert_array_equal(samples, processed_samples)
def test_compose_with_torchaudio_transform(self): samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32) sample_rate = 16000 augment = Compose( [Vol(gain=-6, gain_type="db"), PolarityInversion(p=1.0)]) processed_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).numpy() expected_factor = -convert_decibels_to_amplitude_ratio(-6) assert_almost_equal( processed_samples, expected_factor * np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32), decimal=6, ) self.assertEqual(processed_samples.dtype, np.float32)
def test_compose(self): samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32) sample_rate = 16000 augment = Compose([ Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0), PolarityInversion(p=1.0), ]) processed_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).numpy() expected_factor = -convert_decibels_to_amplitude_ratio(-6) assert_almost_equal( processed_samples, expected_factor * np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32), decimal=6, ) self.assertEqual(processed_samples.dtype, np.float32)
def test_polarity_inversion_multichannel(self): samples = np.array( [[1.0, 0.5, -0.25, -0.125, 0.0], [1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32, ) sample_rate = 16000 augment = PolarityInversion(p=1.0) inverted_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate).numpy() assert_almost_equal( inverted_samples, np.array( [[-1.0, -0.5, 0.25, 0.125, 0.0], [-1.0, -0.5, 0.25, 0.125, 0.0]], dtype=np.float32, ), ) self.assertEqual(inverted_samples.dtype, np.float32)
def test_compose_without_specifying_output_type(self): samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32) sample_rate = 16000 augment = Compose([ Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0), PolarityInversion(p=1.0), ]) processed_samples = augment(samples=torch.from_numpy(samples), sample_rate=sample_rate) # This dtype should be torch.Tensor until we switch to ObjectDict as default assert type(processed_samples) == torch.Tensor processed_samples = processed_samples.numpy() expected_factor = -convert_decibels_to_amplitude_ratio(-6) assert_almost_equal( processed_samples, expected_factor * np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32), decimal=6, ) self.assertEqual(processed_samples.dtype, np.float32)
def test_polarity_inversion_variability_within_batch(self): samples = np.array([1.0, 0.5, 0.25, 0.125, 0.0], dtype=np.float32) samples_batch = np.vstack([samples] * 10000) sample_rate = 16000 augment = PolarityInversion(p=0.5) processed_samples = augment(samples=torch.from_numpy(samples_batch), sample_rate=sample_rate).numpy() num_unprocessed_examples = 0 num_processed_examples = 0 for i in range(processed_samples.shape[0]): if sum(processed_samples[i]) > 0: num_unprocessed_examples += 1 else: num_processed_examples += 1 self.assertEqual(num_unprocessed_examples + num_processed_examples, 10000) print(num_processed_examples) self.assertGreater(num_processed_examples, 2000) self.assertLess(num_processed_examples, 8000)
shuffle=True, ), "name": "Shuffled Compose with Gain and PeakNormalization", "num_runs": 5, }, { "get_instance": lambda: Compose( transforms=[ Gain(min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5), PolarityInversion(mode=mode, p=0.5), ], shuffle=True, ), "name": "Compose with Gain and PolarityInversion", "num_runs": 5, }, { "get_instance": lambda: Gain(mode=mode, p=1.0), "num_runs": 5 }, { "get_instance": lambda: HighPassFilter(mode=mode, p=1.0), "num_runs": 5
def test_parameters(self): # Test that we can access the parameters function of nn.Module augment = PolarityInversion(p=1.0) params = augment.parameters() assert isinstance(params, types.GeneratorType)
min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0 ), PeakNormalization(mode=mode, p=1.0), ], shuffle=True, ), "name": "Shuffled Compose with Gain and PeakNormalization", "num_runs": 5, }, { "get_instance": lambda: Compose( transforms=[ Gain( min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5 ), PolarityInversion(mode=mode, p=0.5), ], shuffle=True, ), "name": "Compose with Gain and PolarityInversion", "num_runs": 5, }, {"get_instance": lambda: Gain(mode=mode, p=1.0), "num_runs": 5}, {"get_instance": lambda: PolarityInversion(mode=mode, p=1.0), "num_runs": 1}, {"get_instance": lambda: PeakNormalization(mode=mode, p=1.0), "num_runs": 1}, {"get_instance": lambda: Shift(mode=mode, p=1.0), "num_runs": 5}, {"get_instance": lambda: ShuffleChannels(mode=mode, p=1.0), "num_runs": 5}, ] execution_times = {}
if __name__ == "__main__": """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. Also crudely measure and print execution time. """ output_dir = os.path.join(SCRIPTS_DIR, "output") os.makedirs(output_dir, exist_ok=True) np.random.seed(42) random.seed(42) samples, _ = librosa.load( os.path.join(TEST_FIXTURES_DIR, "acoustic_guitar_0.wav"), sr=SAMPLE_RATE ) transforms = [{"instance": PolarityInversion(p=1.0), "num_runs": 1}] execution_times = {} for transform in transforms: augmenter = transform["instance"] run_name = ( transform.get("name") if transform.get("name") else transform["instance"].__class__.__name__ ) execution_times[run_name] = [] for i in range(transform["num_runs"]): output_file_path = os.path.join( output_dir, "{}_{:03d}.wav".format(run_name, i) )
) BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg" IR_PATH = TEST_FIXTURES_DIR / "ir" @pytest.mark.parametrize( "augment", [ # Differentiable transforms: AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0, output_type="dict"), ApplyImpulseResponse(IR_PATH, p=1.0, output_type="dict"), Compose( transforms=[ Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0), PolarityInversion(p=1.0), ], output_type="dict", ), Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0, output_type="dict"), PolarityInversion(p=1.0, output_type="dict"), Shift(p=1.0, output_type="dict"), # Non-differentiable transforms: # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead. # Hint: enable anomaly detection to find the operation that failed to compute its gradient, # with torch.autograd.set_detect_anomaly(True). pytest.param(
shuffle=True, ), "name": "Shuffled Compose with Gain and PeakNormalization", "num_runs": 5, }, { "instance": Compose( transforms=[ Gain(min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5), PolarityInversion(mode=mode, p=0.5), ], shuffle=True, ), "name": "Compose with Gain and PolarityInversion", "num_runs": 5, }, { "instance": Gain(mode=mode, p=1.0), "num_runs": 5 }, { "instance": PolarityInversion(mode=mode, p=1.0), "num_runs": 1
min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0 ), PeakNormalization(mode=mode, p=1.0), ], shuffle=True, ), "name": "Shuffled Compose with Gain and PeakNormalization", "num_runs": 5, }, { "instance": Compose( transforms=[ Gain( min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5 ), PolarityInversion(mode=mode, p=0.5), ], shuffle=True, ), "name": "Compose with Gain and PolarityInversion", "num_runs": 5, }, {"instance": Gain(mode=mode, p=1.0), "num_runs": 5}, {"instance": PolarityInversion(mode=mode, p=1.0), "num_runs": 1}, {"instance": PeakNormalization(mode=mode, p=1.0), "num_runs": 1}, {"instance": Shift(mode=mode, p=1.0), "num_runs": 5}, ] execution_times = {} for transform in transforms:
import torch from torch_audiomentations import Compose, Gain, PolarityInversion import torchaudio # Initialize augmentation callable apply_augmentation = Compose(transforms=[ Gain( min_gain_in_db=-15.0, max_gain_in_db=40.0, p=1.0, ), PolarityInversion(p=0.0) ]) torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Make an example tensor with white noise. # This tensor represents 8 audio snippets with 2 channels (stereo) and 2 s of 16 kHz audio. # audio_samples = torch.rand(size=(8, 2, 32000), dtype=torch.float32, device=torch_device) - 0.5 audio_samples = torchaudio.load( "/Users/bdubel/Documents/ZHAW/BA/data/eth_ch_dialects/ag/ch_ag_0107.wav") # Apply augmentation. This varies the gain and polarity of (some of) # the audio snippets in the batch independently. perturbed_audio_samples = apply_augmentation(audio_samples[0], sample_rate=16000) torchaudio.save( '/Users/bdubel/Documents/ZHAW/BA/data/swiss_all/perturbation/test1.flac', perturbed_audio_samples, sample_rate=16000)
HighPassFilter, ) BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg" IR_PATH = TEST_FIXTURES_DIR / "ir" @pytest.mark.parametrize( "augment", [ # Differentiable transforms: AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0), ApplyImpulseResponse(IR_PATH, p=1.0), Compose(transforms=[ Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0), PolarityInversion(p=1.0), ]), Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0), PolarityInversion(p=1.0), Shift(p=1.0), # Non-differentiable transforms: # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead. # Hint: enable anomaly detection to find the operation that failed to compute its gradient, # with torch.autograd.set_detect_anomaly(True). pytest.param(HighPassFilter(p=1.0), marks=pytest.mark.skip("Not differentiable")), pytest.param(LowPassFilter(p=1.0), marks=pytest.mark.skip("Not differentiable")), pytest.param(PeakNormalization(p=1.0), marks=pytest.mark.skip("Not differentiable")),