if self.mode == "valid": for i in record['species_id']: label[i] = 1 return { "image" : y, "target" : label, "id" : record['recording_id'] } ############################### #Augmentations ############################### import audiomentations as AA train_audio_transform = AA.Compose([ AA.AddGaussianNoise(p=0.5), AA.AddGaussianSNR(p=0.5), #AA.AddBackgroundNoise("../input/train_audio/", p=1) #AA.AddImpulseResponse(p=0.1), #AA.AddShortNoises("../input/train_audio/", p=1) #AA.FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.2, p=0.1), #AA.TimeMask(min_band_part=0.0, max_band_part=0.2, p=0.1), #AA.PitchShift(min_semitones=-0.5, max_semitones=0.5, p=0.1), #AA.Shift(p=0.1), #AA.Normalize(p=0.1), #AA.ClippingDistortion(min_percentile_threshold=0, max_percentile_threshold=1, p=0.05), #AA.PolarityInversion(p=0.05), #AA.Gain(p=0.2) ])
import audiomentations import cv2 from model.mixers import UseMixerWithProb, RandomMixer, SigmoidConcatMixer, AddMixer, SigmoidVerticalConcatMixer from model.random_resized_crop import RandomResizedCrop, RandomResizedCrop2 from model.transforms import Compose, UseWithProb, SpecAugment, SpectreScale, PreprocessMelImage, GaussNoise, OneOf, \ PadToSize, RandomCrop, PreprocessSingleChannelMelImage wave_augmentations = { 0: None, 1: audiomentations.Compose([ audiomentations.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), audiomentations.TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), audiomentations.PitchShift(min_semitones=-4, max_semitones=4, p=0.5), audiomentations.Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ], p=0.96), # (1-(1-0.5)^4)*0.96==0.9 - In total there will be 90% augmented samples 2: audiomentations.Compose([ audiomentations.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.010, p=0.95), audiomentations.Shift(min_fraction=-0.1, max_fraction=0.1, p=0.3), ], p=1), } size_4_sec_750_hop = 256 _base_mel_post_process = { 'none': [], '3ch_1': [ # Use librosa.feature.delta with order 1 and 2 for creating 2 additional channels then divide by 100 PreprocessMelImage(), ], '1ch_1': [PreprocessSingleChannelMelImage(), ],
kfold = StratifiedKFold(n_splits=CFG.n_fold) for fold, (t_idx, v_idx) in enumerate(kfold.split(X, y)): train_gby.loc[v_idx, "kfold"] = fold train_df = train_df.merge(train_gby[["recording_id", "kfold"]], on="recording_id", how="left") print(train_df.kfold.value_counts()) train_df.to_csv(OUTPUT_DIR / "folds.csv", index=False) species_fmin_fmax.to_csv(OUTPUT_DIR / "species_fmin_fmax.csv", index=False) ################################################ # audiomentations # ################################################ augmenter = A.Compose([ A.AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.03, p=0.2), A.PitchShift(min_semitones=-3, max_semitones=3, p=0.2), A.Gain(p=0.2) ]) ################################################ # Dataset # ################################################ def cut_spect(spect: torch.Tensor, fmin_mel: int, fmax_mel: int): return spect[fmin_mel:fmax_mel] def do_normalize(img: torch.Tensor): bs, ch, w, h = img.shape
import audiomentations as A augmenter = A.Compose([ A.AddGaussianNoise(p=0.4), A.AddGaussianSNR(p=0.4), #A.AddBackgroundNoise("../input/train_audio/", p=1) #A.AddImpulseResponse(p=0.1), #A.AddShortNoises("../input/train_audio/", p=1) #A.FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.2, p=0.05), #A.TimeMask(min_band_part=0.0, max_band_part=0.2, p=0.05), #A.PitchShift(min_semitones=-0.5, max_semitones=0.5, p=0.05), #A.Shift(p=0.1), #A.Normalize(p=0.1), #A.ClippingDistortion(min_percentile_threshold=0, max_percentile_threshold=1, p=0.05), #A.PolarityInversion(p=0.05), A.Gain(p=0.2) ])