train_gby.loc[v_idx, "kfold"] = fold train_df = train_df.merge(train_gby[["recording_id", "kfold"]], on="recording_id", how="left") print(train_df.kfold.value_counts()) train_df.to_csv(OUTPUT_DIR / "folds.csv", index=False) species_fmin_fmax.to_csv(OUTPUT_DIR / "species_fmin_fmax.csv", index=False) ################################################ # audiomentations # ################################################ augmenter = A.Compose([ A.AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.03, p=0.2), A.PitchShift(min_semitones=-3, max_semitones=3, p=0.2), A.Gain(p=0.2) ]) ################################################ # Dataset # ################################################ def cut_spect(spect: torch.Tensor, fmin_mel: int, fmax_mel: int): return spect[fmin_mel:fmax_mel] def do_normalize(img: torch.Tensor): bs, ch, w, h = img.shape _img = img.clone() _img = _img.view(bs, -1)
def __init__(self, sample_rate, min_gain_in_db=-12, max_gain_in_db=12, p=0.5, **kwargs): store_attr('min_gain_in_db'), store_attr('max_gain_in_db'), store_attr('p') super().__init__(**kwargs) self.tfm = partial(aug.Gain(min_gain_in_db=min_gain_in_db, max_gain_in_db=max_gain_in_db, p=p), sample_rate=sample_rate)