def setUp(self): self.sample_rate = 16000 self.batch_size = 32 self.empty_input_audio = torch.empty(0) # TODO: use utils.io.Audio self.input_audio = ( torch.from_numpy( load_audio( TEST_FIXTURES_DIR / "acoustic_guitar_0.wav", sample_rate=self.sample_rate, ) ) .unsqueeze(0) .unsqueeze(0) ) self.input_audios = torch.cat([self.input_audio] * self.batch_size, dim=0) self.bg_path = TEST_FIXTURES_DIR / "bg" self.bg_short_path = TEST_FIXTURES_DIR / "bg_short" self.bg_noise_transform_guaranteed = AddBackgroundNoise(self.bg_path, 20, p=1.0) self.bg_short_noise_transform_guaranteed = AddBackgroundNoise( self.bg_short_path, 20, p=1.0 ) self.bg_noise_transform_no_guarantee = AddBackgroundNoise(self.bg_path, 20, p=0.0)
def test_invalid_params(self): with self.assertRaises(ValueError): augment = AddBackgroundNoise(self.bg_path, min_snr_in_db=30, max_snr_in_db=3, p=1.0, output_type="dict")
def test_varying_snr_within_batch(self): min_snr_in_db = 3 max_snr_in_db = 30 augment = AddBackgroundNoise( self.bg_path, min_snr_in_db=min_snr_in_db, max_snr_in_db=max_snr_in_db, p=1.0, output_type="dict", ) augmented_audios = augment(self.input_audios, self.sample_rate).samples self.assertEqual(tuple(augmented_audios.shape), tuple(self.input_audios.shape)) self.assertFalse(torch.equal(augmented_audios, self.input_audios)) added_noises = augmented_audios - self.input_audios actual_snr_values = [] for i in range(len(self.input_audios)): signal_rms = calculate_rms(self.input_audios[i]) noise_rms = calculate_rms(added_noises[i]) snr_in_db = 20 * torch.log10(signal_rms / noise_rms).item() self.assertGreaterEqual(snr_in_db, min_snr_in_db) self.assertLessEqual(snr_in_db, max_snr_in_db) actual_snr_values.append(snr_in_db) self.assertGreater( max(actual_snr_values) - min(actual_snr_values), 13.37)
def test_compatibility_of_resampled_length(self): random.seed(42) for _ in range(30): input_length = random.randint(1333, 1399) bg_length = random.randint(1333, 1399) input_sample_rate = random.randint(1000, 5000) bg_sample_rate = random.randint(1000, 5000) noise = np.random.uniform( low=-0.2, high=0.2, size=(bg_length, ), ).astype(np.float32) tmp_dir = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: os.makedirs(tmp_dir) write(os.path.join(tmp_dir, "noise.wav"), rate=bg_sample_rate, data=noise) print( f"input_length={input_length}, input_sample_rate={input_sample_rate}," f" bg_length={bg_length}, bg_sample_rate={bg_sample_rate}") input_audio = torch.randn(1, 1, input_length, dtype=torch.float32) transform = AddBackgroundNoise( tmp_dir, min_snr_in_db=4, max_snr_in_db=6, p=1.0, sample_rate=input_sample_rate, output_type="dict", ) transform(input_audio) except Exception: raise finally: shutil.rmtree(tmp_dir)
def test_min_equals_max(self): desired_snr = 3.0 augment = AddBackgroundNoise( self.bg_path, min_snr_in_db=desired_snr, max_snr_in_db=desired_snr, p=1.0, output_type="dict", ) augmented_audios = augment(self.input_audios, self.sample_rate).samples self.assertEqual(tuple(augmented_audios.shape), tuple(self.input_audios.shape)) self.assertFalse(torch.equal(augmented_audios, self.input_audios)) added_noises = augmented_audios - self.input_audios for i in range(len(self.input_audios)): signal_rms = calculate_rms(self.input_audios[i]) noise_rms = calculate_rms(added_noises[i]) snr_in_db = 20 * torch.log10(signal_rms / noise_rms).item() self.assertAlmostEqual(snr_in_db, desired_snr, places=5)
PolarityInversion, Compose, Shift, LowPassFilter, HighPassFilter, ) BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg" IR_PATH = TEST_FIXTURES_DIR / "ir" @pytest.mark.parametrize( "augment", [ # Differentiable transforms: AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0, output_type="dict"), ApplyImpulseResponse(IR_PATH, p=1.0, output_type="dict"), Compose( transforms=[ Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0), PolarityInversion(p=1.0), ], output_type="dict", ), Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0, output_type="dict"), PolarityInversion(p=1.0, output_type="dict"), Shift(p=1.0, output_type="dict"), # Non-differentiable transforms:
PolarityInversion, Compose, Shift, LowPassFilter, HighPassFilter, ) BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg" IR_PATH = TEST_FIXTURES_DIR / "ir" @pytest.mark.parametrize( "augment", [ # Differentiable transforms: AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0), ApplyImpulseResponse(IR_PATH, p=1.0), Compose(transforms=[ Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0), PolarityInversion(p=1.0), ]), Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0), PolarityInversion(p=1.0), Shift(p=1.0), # Non-differentiable transforms: # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead. # Hint: enable anomaly detection to find the operation that failed to compute its gradient, # with torch.autograd.set_detect_anomaly(True). pytest.param(HighPassFilter(p=1.0), marks=pytest.mark.skip("Not differentiable")),
filenames = ["perfect-alley1.ogg", "perfect-alley2.ogg"] samples1, _ = librosa.load( os.path.join(TEST_FIXTURES_DIR, filenames[0]), sr=SAMPLE_RATE, mono=False ) samples2, _ = librosa.load( os.path.join(TEST_FIXTURES_DIR, filenames[1]), sr=SAMPLE_RATE, mono=False ) samples = np.stack((samples1, samples2), axis=0) samples = torch.from_numpy(samples) modes = ["per_batch", "per_example", "per_channel"] for mode in modes: transforms = [ { "instance": AddBackgroundNoise( background_paths=TEST_FIXTURES_DIR / "bg", mode=mode, p=1.0 ), "num_runs": 5, }, { "instance": ApplyImpulseResponse( ir_paths=TEST_FIXTURES_DIR / "ir", mode=mode, p=1.0 ), "num_runs": 1, }, { "instance": Compose( transforms=[ Gain( min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0 ),