Exemple #1
0
def get_transforms(bckgrd_aug_dir=None, secondary_bckgrd_aug_dir=None):
    list_of_aug = [
        #         AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3),
        AddGaussianNoise(p=0.2),
        AddGaussianSNR(p=0.2),
        Gain(min_gain_in_db=-15, max_gain_in_db=15, p=0.3)
    ]
    if bckgrd_aug_dir is not None:
        list_of_aug.append(AddBackgroundNoise(bckgrd_aug_dir, p=0.2))
    if secondary_bckgrd_aug_dir is not None:
        list_of_aug.append(
            AddShortNoises(secondary_bckgrd_aug_dir,
                           min_time_between_sounds=0.0,
                           max_time_between_sounds=15.0,
                           burst_probability=0.5,
                           p=0.6))
    list_of_aug += [
        AddGaussianNoise(p=0.2),
        AddGaussianSNR(p=0.2),
        Gain(min_gain_in_db=-15, max_gain_in_db=15, p=0.3)
    ]
    augmenter = Compose(list_of_aug)
    transforms = {
        "train": get_training_augmentation(augmenter),
        "valid": get_validation_augmentation()
    }
    return transforms
def make_transform():
    return Compose([
        FrequencyMask(min_frequency_band=0.005,
                      max_frequency_band=0.10,
                      p=0.25),
        TimeStretch(min_rate=0.15, max_rate=.25, p=0.25),
        AddGaussianSNR(min_SNR=0.001, max_SNR=.25, p=0.25)
    ])
def get_transforms(bckgrd_aug_dir=None):
    list_of_aug = [
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3),
        AddGaussianSNR(p=0.3)
    ]
    if bckgrd_aug_dir is not None:
        list_of_aug.append(AddBackgroundNoise(bckgrd_aug_dir,p=0.5))
    augmenter = Compose(list_of_aug)
    transforms = {
        "train": get_training_augmentation(augmenter),
        "valid": get_validation_augmentation()
    }
    return transforms
Exemple #4
0
def compose_augmentations(rir_path):
    impulse_path = os.path.join(rir_path, 'simulated_rirs')
    noise_path = os.path.join(rir_path, 'pointsource_noises')
    if not (os.path.exists(impulse_path) and os.path.exists(noise_path)):
        raise ValueError(
            'Unable to augment signal, rir_path "{}" does not exist.'.format(
                rir_path))

    return Compose([
        AddGaussianSNR(min_SNR=0.2, max_SNR=0.5, p=0.5),
        AddImpulseResponse(impulse_path, leave_length_unchanged=True, p=0.3),
        AddBackgroundNoise(noise_path, p=0.3),
        AddShortNoises(noise_path, max_snr_in_db=80, p=0.3)
    ])
Exemple #5
0
def compose_without_noise(ir_path='data/impulse'):
    _p = 0.25

    transforms = [
        AddGaussianNoise(p=_p),
        Shift(p=_p, min_fraction=-0.2, max_fraction=0.2),
        FrequencyMask(p=_p),
        TimeMask(p=_p, max_band_part=0.25),
        AddGaussianSNR(p=_p),
        ClippingDistortion(p=_p, max_percentile_threshold=20),
        MyAddImpulseResponse(p=_p, ir_path=ir_path),
        TimeStretch(p=_p / 10),
        PitchShift(p=_p / 25),
    ]

    return MyCompose(transforms, p=1.0, max_augs=3)
Exemple #6
0
def compose(sounds_path):
  _p = 0.2

  transforms = [
    MyGain(p=_p),
    AddGaussianNoise(p=_p),
    Shift(p=_p, min_fraction=-0.25, max_fraction=0.25),
    FrequencyMask(p=_p),
    TimeMask(p=_p, max_band_part=0.25),
    AddGaussianSNR(p=_p),
    ClippingDistortion(p=_p, max_percentile_threshold=20),
    AddBackgroundNoise(sounds_path=sounds_path, p=_p),
    TimeStretch(p=_p/10),
    PitchShift(p=_p/30),
  ]
  
  return Compose(transforms, p=0.4, shuffle=True)
Exemple #7
0
 {
     "instance":
     AddBackgroundNoise(sounds_path=os.path.join(
         DEMO_DIR, "background_noises"),
                        p=1.0),
     "num_runs":
     5,
 },
 {
     "instance":
     AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0),
     "num_runs":
     5,
 },
 {
     "instance": AddGaussianSNR(p=1.0),
     "num_runs": 5
 },
 {
     "instance":
     AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")),
     "num_runs":
     1,
 },
 {
     "instance":
     AddShortNoises(
         sounds_path=os.path.join(DEMO_DIR, "short_noises"),
         min_snr_in_db=0,
         max_snr_in_db=8,
         min_time_between_sounds=2.0,
Exemple #8
0
    def generate(self, wave_file, output_dir):
        """
        For each transformation, apply it to an example sound and write the transformed sounds to
        an output folder.
        """
        samples = load_wav_file(wave_file)
        _filename = os.path.basename(wave_file).split('.')[0]
        # AddImpulseResponse
        if self.AddImpulseResponse[0]:
            augmenter = Compose([
                AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))
            ])
            output_file_path = os.path.join(
                output_dir,
                _filename + "_AddImpulseResponse{:03d}.wav".format(0))
            augmented_samples = augmenter(samples=samples,
                                          sample_rate=SAMPLE_RATE)
            wavfile.write(output_file_path,
                          rate=SAMPLE_RATE,
                          data=augmented_samples)
        # FrequencyMask
        if self.FrequencyMask[0]:
            augmenter = Compose([FrequencyMask(p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_FrequencyMask{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # TimeMask
        if self.TimeMask[0]:
            augmenter = Compose([TimeMask(p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_TimeMask{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # AddGaussianSNR
        if self.AddGaussianSNR[0]:
            augmenter = Compose([AddGaussianSNR(p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddGaussianSNR{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # AddGaussianNoise
        if self.AddGaussianNoise[0]:
            augmenter = Compose([
                AddGaussianNoise(min_amplitude=0.001,
                                 max_amplitude=0.015,
                                 p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddGaussianNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # TimeStretch
        if self.TimeStretch[0]:
            augmenter = Compose(
                [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_TimeStretch{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # PitchShift
        if self.PitchShift[0]:
            augmenter = Compose(
                [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_PitchShift{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # Shift
        if self.Shift[0]:
            augmenter = Compose(
                [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_Shift{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # Shift without rollover
        if self.ShiftWithoutRoll[0]:
            augmenter = Compose([
                Shift(min_fraction=-0.2,
                      max_fraction=0.2,
                      rollover=False,
                      p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_ShiftWithoutRollover{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # Normalize
        if self.Normalize[0]:
            augmenter = Compose([Normalize(p=1.0)])
            output_file_path = os.path.join(
                output_dir, _filename + "_Normalize{:03d}.wav".format(0))
            augmented_samples = augmenter(samples=samples,
                                          sample_rate=SAMPLE_RATE)
            wavfile.write(output_file_path,
                          rate=SAMPLE_RATE,
                          data=augmented_samples)

        # Resample
        if self.Resample[0]:
            augmenter = Compose([
                Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_Resample{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # ClippingDistortion
        if self.ClippingDistortion[0]:
            augmenter = Compose(
                [ClippingDistortion(max_percentile_threshold=10, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_ClippingDistortion{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # AddBackgroundNoise
        if self.AddBackgroundNoise[0]:
            augmenter = Compose([
                AddBackgroundNoise(sounds_path=os.path.join(
                    DEMO_DIR, "background_noises"),
                                   p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddBackgroundNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
        # AddWhiteNoise
        if self.AddWhiteNoise[0]:
            augmenter = Compose([
                AddBackgroundNoise(sounds_path=os.path.join(
                    DEMO_DIR, "white_noises"),
                                   p=1.0)
            ])
            for i in range(self.AddWhiteNoise[1]):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddWhiteNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
        # AddPinkNoise
        if self.AddPinkNoise[0]:
            augmenter = Compose([
                AddBackgroundNoise(sounds_path=os.path.join(
                    DEMO_DIR, "pink_noises"),
                                   p=1.0)
            ])
            for i in range(self.AddPinkNoise[1]):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddPinkNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
        # AddShortNoises
        if self.AddShortNoises[0]:
            augmenter = Compose([
                AddShortNoises(
                    sounds_path=os.path.join(DEMO_DIR, "short_noises"),
                    min_snr_in_db=0,
                    max_snr_in_db=8,
                    min_time_between_sounds=2.0,
                    max_time_between_sounds=4.0,
                    burst_probability=0.4,
                    min_pause_factor_during_burst=0.01,
                    max_pause_factor_during_burst=0.95,
                    min_fade_in_time=0.005,
                    max_fade_in_time=0.08,
                    min_fade_out_time=0.01,
                    max_fade_out_time=0.1,
                    p=1.0,
                )
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddShortNoises{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
Exemple #9
0
    def __init__(self,
                 root_dir,
                 csv_dir,
                 conf,
                 bird_code,
                 inv_ebird_label,
                 num_test_samples=10,
                 bckgrd_aug_dir=None,
                 background_audio_dir=None,
                 file_type="mp3",
                 isTraining=True,
                 transform=None,
                 apply_mixer=False):
        self.root_dir = root_dir
        self.conf = conf
        self.isTraining = isTraining
        self.bird_code = bird_code
        self.inv_ebird_label = inv_ebird_label
        self.transform = transform
        self.file_type = file_type
        self.apply_mixer = apply_mixer
        self.additional_loader_params = {
            "worker_init_fn": self.init_workers_fn,
            "collate_fn": self.collate_fn
        }
        self.sampler = ImbalancedDatasetSampler

        df = pd.read_csv(csv_dir)
        df.secondary_labels = df.secondary_labels.apply(eval)
        self.data = list(df[["filename", "ebird_code",
                             "secondary_labels"]].to_dict('index').values())

        self.background_audio_dir = background_audio_dir
        if self.background_audio_dir is not None:
            for bk in background_audio_dir.glob('**/*.wav'):
                self.data.append({"filename": bk})

        self.num_test_samples = num_test_samples
        self.length = len(self.data)

        if self.apply_mixer:
            self.dict_grp = {}
            for grp, d in df.groupby("ebird_code"):
                self.dict_grp[grp] = d.index.values
            self.possible_mixer_keys = list(self.dict_grp.keys())

            if bckgrd_aug_dir is not None:
                self.augmenter = Compose([
                    AddGaussianNoise(min_amplitude=0.001,
                                     max_amplitude=0.015,
                                     p=0.3),
                    AddGaussianSNR(p=0.3),
                    PitchShift(min_semitones=-4, max_semitones=4, p=0.3),
                    AddBackgroundNoise(bckgrd_aug_dir, p=0.5),
                ])
            else:
                self.augmenter = Compose([
                    AddGaussianNoise(min_amplitude=0.001,
                                     max_amplitude=0.015,
                                     p=0.3),
                    AddGaussianSNR(p=0.3),
                    PitchShift(min_semitones=-4, max_semitones=4, p=0.3)
                ])
        del df
import matplotlib.pyplot as plt


# now in different location
files = glob.glob('./dataset/*.wav')
for audio_file in files:
    sample_rate, sound_np = wavfile.read(audio_file)
    if sound_np.dtype != np.float32:
        assert sound_np.dtype == np.int16
        sound_np = np.divide(
            sound_np, 32768, dtype=np.float32
        )
    number = os.path.split(audio_file)[-1][:-4]

    transforms = [
        {"instance": AddGaussianSNR(p=1.0), "num_runs": 3},
        {"instance": TimeStretch(min_rate=0.4, max_rate=1.25, p=1.0), "num_runs": 5},
        {
            "instance": PitchShift(min_semitones=-5, max_semitones=5, p=1.0),
            "num_runs": 6,
        },
        {"instance": Shift(min_fraction=-0.85, max_fraction=0.85, p=1.0), "num_runs": 4},
        {"instance": Resample(p=1.0), "num_runs": 5},
        {"instance": ClippingDistortion(p=1.0), "num_runs": 3},
    ]

    for transform in transforms:
        augmenter = Compose([transform["instance"]])
        run_name = (
            transform.get("name")
            if transform.get("name")
Exemple #11
0
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "FrequencyMask_{:03d}.wav".format(i)
        )
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # TimeMask
    augmenter = Compose([TimeMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir, "TimeMask_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # AddGaussianSNR
    augmenter = Compose([AddGaussianSNR(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "AddGaussianSNR_{:03d}.wav".format(i)
        )
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]
    )
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "AddGaussianNoise_{:03d}.wav".format(i)
        )
Exemple #12
0
 def __init__(self, augment_type, p, cross_valid=False):
     self.cross_valid = cross_valid
     self.sample_rate = 8000
     self.type = augment_type
     self.p = p
     wham_path = '../../../librimix/data/wham_noise/cv' if self.cross_valid else '../../../librimix/data/wham_noise/tr'
     if self.type == 'wham_weak':
         self.augment = Compose([
             AddBackgroundNoise(sounds_path=wham_path,
                                min_snr_in_db=5,
                                max_snr_in_db=15,
                                p=1)
         ])
     elif self.type == 'wham_strong':
         self.augment = Compose([
             AddBackgroundNoise(sounds_path=wham_path,
                                min_snr_in_db=2,
                                max_snr_in_db=7,
                                p=1)
         ])
     elif self.type == 'reverb_weak':
         self.augment = AudioEffectsChain().reverb(
             reverberance=random.randrange(0, 50),
             room_scale=random.randrange(0, 50),
             stereo_depth=random.randrange(0, 50),
         )
     elif self.type == 'reverb_strong':
         self.augment = AudioEffectsChain().reverb(
             reverberance=random.randrange(50, 100),
             room_scale=random.randrange(50, 100),
             stereo_depth=random.randrange(50, 100),
         )
     elif self.type == 'cascade':
         self.augment = Compose([
             AddBackgroundNoise(sounds_path=wham_path,
                                min_snr_in_db=0,
                                max_snr_in_db=5,
                                p=self.p),
             AddGaussianSNR(min_SNR=0.001, max_SNR=0.25, p=self.p),
             ClippingDistortion(min_percentile_threshold=0,
                                max_percentile_threshold=40,
                                p=self.p),
             FrequencyMask(min_frequency_band=0.0,
                           max_frequency_band=0.5,
                           p=self.p),
             PolarityInversion(p=self.p),
             Shift(min_fraction=-0.5,
                   max_fraction=0.5,
                   rollover=True,
                   p=self.p),
             TimeMask(min_band_part=0.0,
                      max_band_part=0.2,
                      fade=False,
                      p=self.p)
         ])
     elif self.type == 'distort':
         self.augment = Compose([
             PitchShift(min_semitones=-4, max_semitones=4, p=self.p),
             TimeStretch(min_rate=0.8,
                         max_rate=1.25,
                         leave_length_unchanged=True,
                         p=self.p)
         ])
     elif self.type == 'none':
         self.augment = None
     else:
         raise ValueError(
             "Did not recognize augmentation type. Received %s, expected 'wham_weak', 'wham_strong', 'reverb_weak', 'reverb_strong', 'cascade', 'distort', or 'none'."
             % self.type)
Exemple #13
0
def transform(file_path, output_folder, iterations):
    """
    For each transformation, apply it to an example sound and write the transformed sounds to
    an output folder.
    """

    samples = load_wav_file(file_path)
    file_name = os.path.basename(file_path).replace('.wav', '')

    def produce(augmenter, name):
        for i in range(iterations):
            output_file_path = '{}/{}'.format(
                output_folder, "{}_{}_{}.wav".format(name, file_name, i))
            augmented_samples = augmenter(samples=samples,
                                          sample_rate=SAMPLE_RATE)
            wavfile.write(output_file_path,
                          rate=SAMPLE_RATE,
                          data=augmented_samples)

    # TimeMask
    augmenter = Compose([TimeMask(p=1.0)])
    produce(augmenter, 'TimeMask')

    # FrequencyMask
    augmenter = Compose([FrequencyMask(p=1.0)])
    produce(augmenter, 'FrequencyMask')

    # AddGaussianSNR
    augmenter = Compose([AddGaussianSNR(p=1.0)])
    produce(augmenter, 'AddGaussianSNR')

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    produce(augmenter, 'PitchShift')

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)])
    produce(augmenter, 'TimeStretch')

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)])
    produce(augmenter, 'AddGaussianNoise')

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    produce(augmenter, 'Shift')

    # Shift without rollover
    augmenter = Compose(
        [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)])
    produce(augmenter, 'Shift without rollover')

    # Normalize
    augmenter = Compose([Normalize(p=1.0)])
    produce(augmenter, 'Normalize')

    # AddImpulseResponse
    augmenter = Compose(
        [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))])
    produce(augmenter, 'AddImpulseResponse')

    # Resample
    augmenter = Compose([Resample(p=1.0)])
    produce(augmenter, 'Resample')

    # ClippingDistortion
    augmenter = Compose([ClippingDistortion(p=1.0)])
    produce(augmenter, 'ClippingDistortion')

    # AddBackgroundNoise
    augmenter = Compose([
        AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR,
                                                    "background_noises"),
                           p=1.0)
    ])
    produce(augmenter, 'AddBackgroundNoise')

    # AddShortNoises
    augmenter = Compose([
        AddShortNoises(
            sounds_path=os.path.join(DEMO_DIR, "short_noises"),
            min_snr_in_db=0,
            max_snr_in_db=8,
            min_time_between_sounds=2.0,
            max_time_between_sounds=4.0,
            burst_probability=0.4,
            min_pause_factor_during_burst=0.01,
            max_pause_factor_during_burst=0.95,
            min_fade_in_time=0.005,
            max_fade_in_time=0.08,
            min_fade_out_time=0.01,
            max_fade_out_time=0.1,
            p=1.0,
        )
    ])
    produce(augmenter, 'AddShortNoises')
Exemple #14
0
                 min_snr=5.0,
                 max_snr=20.0,
                 **kwargs):
        super().__init__(always_apply, p)

        self.min_snr = min_snr
        self.max_snr = max_snr

    def apply(self, y: np.ndarray, **params):
        snr = np.random.uniform(self.min_snr, self.max_snr)
        a_signal = np.sqrt(y**2).max()
        a_noise = a_signal / (10**(snr / 20))

        pink_noise = cn.powerlaw_psd_gaussian(1, len(y))
        a_pink = np.sqrt(pink_noise**2).max()
        augmented = (y + pink_noise * 1 / a_pink * a_noise).astype(y.dtype)
        return augmented


AUGMENT = Compose([
    PitchShift(min_semitones=-4, max_semitones=4, p=0.2),
    Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.2),
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.2),
    AddGaussianSNR(min_SNR=0.1, max_SNR=1, p=0.2),
    PinkNoiseSNR(min_snr=5.0, max_snr=10, p=0.2)
])


def do_aug(samples, sample_rate):
    return AUGMENT(samples, sample_rate)
def applyTransformations(fileName, output_dir, auxiliarSoundsDir):
    name = fileName.split(".")[0].split("/")[-1]
    samples = load_wav_file(fileName)

    # AddImpulseResponse
    augmenter = Compose([
        AddImpulseResponse(p=1.0,
                           ir_path=os.path.join(auxiliarSoundsDir,
                                                "helperSounds/ir"))
    ])
    output_file_path = os.path.join(
        output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0))

    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)

    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
    # FrequencyMask
    augmenter = Compose([FrequencyMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeMask
    augmenter = Compose([TimeMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_TimeMask_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianSNR
    augmenter = Compose([AddGaussianSNR(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_itchShift_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "{}_Shift_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift without rollover
    augmenter = Compose(
        [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Normalize
    augmenter = Compose([Normalize(p=1.0)])
    output_file_path = os.path.join(output_dir,
                                    "{}_Normalize_{:03d}.wav".format(name, 0))
    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # ClippingDistortion
    augmenter = Compose([ClippingDistortion(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddBackgroundNoise
    augmenter = Compose([
        AddBackgroundNoise(sounds_path=os.path.join(
            auxiliarSoundsDir, "helperSounds/background_noises"),
                           p=1.0)
    ])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddShortNoises
    augmenter = Compose([
        AddShortNoises(
            sounds_path=os.path.join(auxiliarSoundsDir,
                                     "helperSounds/short_noises"),
            min_snr_in_db=0,
            max_snr_in_db=8,
            min_time_between_sounds=2.0,
            max_time_between_sounds=4.0,
            burst_probability=0.4,
            min_pause_factor_during_burst=0.01,
            max_pause_factor_during_burst=0.95,
            min_fade_in_time=0.005,
            max_fade_in_time=0.08,
            min_fade_out_time=0.01,
            max_fade_out_time=0.1,
            p=1.0,
        )
    ])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)