def get_folds_data(corrections=None): print("Start generate folds data") print("Audio config", get_audio_config()) train_folds_df = pd.read_csv(config.train_folds_path) audio_paths_lst = [] targets_lst = [] folds_lst = [] for i, row in train_folds_df.iterrows(): labels = row.labels if corrections is not None: if row.fname in corrections: action = corrections[row.fname] if action == 'remove': print(f"Skip {row.fname}") continue else: print( f"Replace labels {row.fname} from {labels} to {action}" ) labels = action folds_lst.append(row.fold) audio_paths_lst.append(row.file_path) target = torch.zeros(len(config.classes)) for label in labels.split(','): target[config.class2index[label]] = 1. targets_lst.append(target) with mp.Pool(N_WORKERS) as pool: images_lst = pool.map(read_as_melspectrogram, audio_paths_lst) return images_lst, targets_lst, folds_lst
def get_corrected_noisy_data(): print("Start generate corrected noisy data") print("Audio config", get_audio_config()) train_noisy_df = pd.read_csv(config.train_noisy_csv_path) with open(config.noisy_corrections_json_path) as file: corrections = json.load(file) audio_paths_lst = [] targets_lst = [] for i, row in train_noisy_df.iterrows(): labels = row.labels if row.fname in corrections: action = corrections[row.fname] if action == 'remove': continue else: labels = action else: continue audio_paths_lst.append(config.train_noisy_dir / row.fname) target = torch.zeros(len(config.classes)) for label in labels.split(','): target[config.class2index[label]] = 1. targets_lst.append(target) with mp.Pool(N_WORKERS) as pool: images_lst = pool.map(read_as_melspectrogram, audio_paths_lst) return images_lst, targets_lst
def get_augment_folds_data_generator(time_stretch_lst, pitch_shift_lst): print("Start generate augment folds data") print("Audio config", get_audio_config()) print("time_stretch_lst:", time_stretch_lst) print("pitch_shift_lst:", pitch_shift_lst) train_folds_df = pd.read_csv(config.train_folds_path) audio_paths_lst = [] targets_lst = [] folds_lst = [] for i, row in train_folds_df.iterrows(): folds_lst.append(row.fold) audio_paths_lst.append(row.file_path) target = torch.zeros(len(config.classes)) for label in row.labels.split(','): target[config.class2index[label]] = 1. targets_lst.append(target) with mp.Pool(N_WORKERS) as pool: images_lst = pool.map(read_as_melspectrogram, audio_paths_lst) yield images_lst, targets_lst, folds_lst images_lst = [] for pitch_shift in pitch_shift_lst: pitch_shift_read = partial(read_as_melspectrogram, pitch_shift=pitch_shift) with mp.Pool(N_WORKERS) as pool: images_lst = pool.map(pitch_shift_read, audio_paths_lst) yield images_lst, targets_lst, folds_lst images_lst = [] for time_stretch in time_stretch_lst: time_stretch_read = partial(read_as_melspectrogram, time_stretch=time_stretch) with mp.Pool(N_WORKERS) as pool: images_lst = pool.map(time_stretch_read, audio_paths_lst) yield images_lst, targets_lst, folds_lst images_lst = []