def setUp(self):
        self.sample_rate = 16000
        self.batch_size = 32
        self.empty_input_audio = torch.empty(0)
        # TODO: use utils.io.Audio
        self.input_audio = (
            torch.from_numpy(
                load_audio(
                    TEST_FIXTURES_DIR / "acoustic_guitar_0.wav",
                    sample_rate=self.sample_rate,
                )
            )
            .unsqueeze(0)
            .unsqueeze(0)
        )

        self.input_audios = torch.cat([self.input_audio] * self.batch_size, dim=0)

        self.bg_path = TEST_FIXTURES_DIR / "bg"
        self.bg_short_path = TEST_FIXTURES_DIR / "bg_short"
        self.bg_noise_transform_guaranteed = AddBackgroundNoise(self.bg_path, 20, p=1.0)
        self.bg_short_noise_transform_guaranteed = AddBackgroundNoise(
            self.bg_short_path, 20, p=1.0
        )
        self.bg_noise_transform_no_guarantee = AddBackgroundNoise(self.bg_path, 20, p=0.0)
 def test_invalid_params(self):
     with self.assertRaises(ValueError):
         augment = AddBackgroundNoise(self.bg_path,
                                      min_snr_in_db=30,
                                      max_snr_in_db=3,
                                      p=1.0,
                                      output_type="dict")
    def test_varying_snr_within_batch(self):
        min_snr_in_db = 3
        max_snr_in_db = 30
        augment = AddBackgroundNoise(
            self.bg_path,
            min_snr_in_db=min_snr_in_db,
            max_snr_in_db=max_snr_in_db,
            p=1.0,
            output_type="dict",
        )
        augmented_audios = augment(self.input_audios, self.sample_rate).samples

        self.assertEqual(tuple(augmented_audios.shape),
                         tuple(self.input_audios.shape))
        self.assertFalse(torch.equal(augmented_audios, self.input_audios))

        added_noises = augmented_audios - self.input_audios

        actual_snr_values = []
        for i in range(len(self.input_audios)):
            signal_rms = calculate_rms(self.input_audios[i])
            noise_rms = calculate_rms(added_noises[i])
            snr_in_db = 20 * torch.log10(signal_rms / noise_rms).item()
            self.assertGreaterEqual(snr_in_db, min_snr_in_db)
            self.assertLessEqual(snr_in_db, max_snr_in_db)

            actual_snr_values.append(snr_in_db)

        self.assertGreater(
            max(actual_snr_values) - min(actual_snr_values), 13.37)
    def test_compatibility_of_resampled_length(self):
        random.seed(42)

        for _ in range(30):
            input_length = random.randint(1333, 1399)
            bg_length = random.randint(1333, 1399)
            input_sample_rate = random.randint(1000, 5000)
            bg_sample_rate = random.randint(1000, 5000)

            noise = np.random.uniform(
                low=-0.2,
                high=0.2,
                size=(bg_length, ),
            ).astype(np.float32)
            tmp_dir = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
            try:
                os.makedirs(tmp_dir)
                write(os.path.join(tmp_dir, "noise.wav"),
                      rate=bg_sample_rate,
                      data=noise)

                print(
                    f"input_length={input_length}, input_sample_rate={input_sample_rate},"
                    f" bg_length={bg_length}, bg_sample_rate={bg_sample_rate}")
                input_audio = torch.randn(1,
                                          1,
                                          input_length,
                                          dtype=torch.float32)
                transform = AddBackgroundNoise(
                    tmp_dir,
                    min_snr_in_db=4,
                    max_snr_in_db=6,
                    p=1.0,
                    sample_rate=input_sample_rate,
                    output_type="dict",
                )
                transform(input_audio)
            except Exception:
                raise
            finally:
                shutil.rmtree(tmp_dir)
    def test_min_equals_max(self):
        desired_snr = 3.0
        augment = AddBackgroundNoise(
            self.bg_path,
            min_snr_in_db=desired_snr,
            max_snr_in_db=desired_snr,
            p=1.0,
            output_type="dict",
        )
        augmented_audios = augment(self.input_audios, self.sample_rate).samples

        self.assertEqual(tuple(augmented_audios.shape),
                         tuple(self.input_audios.shape))
        self.assertFalse(torch.equal(augmented_audios, self.input_audios))

        added_noises = augmented_audios - self.input_audios
        for i in range(len(self.input_audios)):
            signal_rms = calculate_rms(self.input_audios[i])
            noise_rms = calculate_rms(added_noises[i])
            snr_in_db = 20 * torch.log10(signal_rms / noise_rms).item()
            self.assertAlmostEqual(snr_in_db, desired_snr, places=5)
    PolarityInversion,
    Compose,
    Shift,
    LowPassFilter,
    HighPassFilter,
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0, output_type="dict"),
        ApplyImpulseResponse(IR_PATH, p=1.0, output_type="dict"),
        Compose(
            transforms=[
                Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        ),
        Gain(min_gain_in_db=-6.000001,
             max_gain_in_db=-6,
             p=1.0,
             output_type="dict"),
        PolarityInversion(p=1.0, output_type="dict"),
        Shift(p=1.0, output_type="dict"),
        # Non-differentiable transforms:
    PolarityInversion,
    Compose,
    Shift,
    LowPassFilter,
    HighPassFilter,
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0),
        ApplyImpulseResponse(IR_PATH, p=1.0),
        Compose(transforms=[
            Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
            PolarityInversion(p=1.0),
        ]),
        Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
        PolarityInversion(p=1.0),
        Shift(p=1.0),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
        pytest.param(HighPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
Beispiel #8
0
    filenames = ["perfect-alley1.ogg", "perfect-alley2.ogg"]
    samples1, _ = librosa.load(
        os.path.join(TEST_FIXTURES_DIR, filenames[0]), sr=SAMPLE_RATE, mono=False
    )
    samples2, _ = librosa.load(
        os.path.join(TEST_FIXTURES_DIR, filenames[1]), sr=SAMPLE_RATE, mono=False
    )
    samples = np.stack((samples1, samples2), axis=0)
    samples = torch.from_numpy(samples)

    modes = ["per_batch", "per_example", "per_channel"]
    for mode in modes:
        transforms = [
            {
                "instance": AddBackgroundNoise(
                    background_paths=TEST_FIXTURES_DIR / "bg", mode=mode, p=1.0
                ),
                "num_runs": 5,
            },
            {
                "instance": ApplyImpulseResponse(
                    ir_paths=TEST_FIXTURES_DIR / "ir", mode=mode, p=1.0
                ),
                "num_runs": 1,
            },
            {
                "instance": Compose(
                    transforms=[
                        Gain(
                            min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0
                        ),