Exemple #1
0
    def fasfasa():
        import audiomentations

        from src import core

        crop_size = 98303  # 768 * 128 - 1
        transforms = audiomentations.Compose([
            core.transforms.ToMono(),
            core.transforms.Squeeze(),
            core.transforms.ToNumpy(),
            audiomentations.TimeStretch(min_rate=0.7, max_rate=1.3, p=0.5),
            audiomentations.PitchShift(min_semitones=-4,
                                       max_semitones=4,
                                       p=0.5),
            core.transforms.ToTorch()
        ])
        train_dataset = DSD100(root=Path.home() / 'Data' / 'Audio' / 'DSD100',
                               crop_size=crop_size,
                               transforms=transforms)

        for i, e in zip(range(10), train_dataset):
            print(e)
Exemple #2
0
for fold, (t_idx, v_idx) in enumerate(kfold.split(X, y)):
    train_gby.loc[v_idx, "kfold"] = fold

train_df = train_df.merge(train_gby[["recording_id", "kfold"]],
                          on="recording_id",
                          how="left")
print(train_df.kfold.value_counts())
train_df.to_csv(OUTPUT_DIR / "folds.csv", index=False)
species_fmin_fmax.to_csv(OUTPUT_DIR / "species_fmin_fmax.csv", index=False)

################################################
# audiomentations #
################################################
augmenter = A.Compose([
    A.AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.03, p=0.2),
    A.PitchShift(min_semitones=-3, max_semitones=3, p=0.2),
    A.Gain(p=0.2)
])

################################################
# Dataset #
################################################


def cut_spect(spect: torch.Tensor, fmin_mel: int, fmax_mel: int):
    return spect[fmin_mel:fmax_mel]


def do_normalize(img: torch.Tensor):
    bs, ch, w, h = img.shape
    _img = img.clone()
import audiomentations
import cv2

from model.mixers import UseMixerWithProb, RandomMixer, SigmoidConcatMixer, AddMixer, SigmoidVerticalConcatMixer
from model.random_resized_crop import RandomResizedCrop, RandomResizedCrop2
from model.transforms import Compose, UseWithProb, SpecAugment, SpectreScale, PreprocessMelImage, GaussNoise, OneOf, \
    PadToSize, RandomCrop, PreprocessSingleChannelMelImage

wave_augmentations = {
    0: None,
    1: audiomentations.Compose([
        audiomentations.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        audiomentations.TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        audiomentations.PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        audiomentations.Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    ], p=0.96),  # (1-(1-0.5)^4)*0.96==0.9 - In total there will be 90% augmented samples
    2: audiomentations.Compose([
        audiomentations.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.010, p=0.95),
        audiomentations.Shift(min_fraction=-0.1, max_fraction=0.1, p=0.3),
    ], p=1),
}

size_4_sec_750_hop = 256

_base_mel_post_process = {
    'none': [],
    '3ch_1': [
        # Use librosa.feature.delta with order 1 and 2 for creating 2 additional channels then divide by 100
        PreprocessMelImage(),
    ],
    '1ch_1': [PreprocessSingleChannelMelImage(), ],
Exemple #4
0
 def __init__(self, sample_rate, min_semitones=-4, max_semitones=4, p=0.5, **kwargs):
     store_attr('min_semitones'), store_attr('max_semitones'), store_attr('p')
     super().__init__(**kwargs)
     self.tfm = partial(aug.PitchShift(min_semitones=min_semitones,
         max_semitones=max_semitones, p=p), sample_rate=sample_rate)