def test_list_wav_files(): audio_files = ["147764-4-7-0.wav", "176787-5-0-0.wav", "40722-8-0-7.wav"] audio_path = "./tests/data/audio" wav_files = list_wav_files(audio_path) assert type(wav_files) is list assert len(wav_files) == 3 for wf in audio_files: wp = os.path.join(audio_path, wf) assert wp in wav_files audio_path = "./tests/data_aiff/audio" wav_files = list_wav_files(audio_path) assert len(wav_files) == 0
def process(self): """ Generate augmentated data for each file in dataset. Replicate the folder structure of {DATASET_PATH}/audio/original into the folder of each augmentation folder. """ if not self.dataset.check_sampling_rate(self.sr): print("Changing sampling rate ...") self.dataset.change_sampling_rate(self.sr) print('Done!') # Get path to the original audio files and list of # folders with augmented files. _, sub_folders = self.get_audio_paths(self.sr) path_original = sub_folders[0] paths_augments = sub_folders[1:] for index in range(len(self.augmentations_list)): augmentation = self.augmentations_list[index] path_augmented = paths_augments[index] # Replicate folder structure of the original files into # the augmented folder. duplicate_folder_structure(path_original, path_augmented) # Process each file in path_original for path_to_file in progressbar(list_wav_files(path_original)): path_to_destination = path_to_file.replace( path_original, path_augmented) if os.path.exists(path_to_destination): continue augmentation['transformer'].build(path_to_file, path_to_destination)
def check_sampling_rate(self, sr): """ Checks if dataset was resampled before. For now, only checks if the folder {audio_path}{sr} exists and each wav file present in audio_path is also present in {audio_path}{sr}. Parameters ---------- sr : int Sampling rate. Returns ------- bool True if the dataset was resampled before. """ audio_path, subfolders = self.get_audio_paths(sr) audio_folder_sr = subfolders[0] if not os.path.exists(audio_folder_sr): return False for path_to_file in list_wav_files(self.audio_path): path_to_destination = path_to_file.replace(self.audio_path, audio_folder_sr) # TODO: check if the audio file was resampled correctly, # not only if exists. if not os.path.exists(path_to_destination): return False return True
def change_sampling_rate(self, new_sr): """ Changes the sampling rate of each wav file in audio_path. Creates a new folder named audio_path{new_sr} (i.e audio22050) and converts each wav file in audio_path and save the result in the new folder. Parameters ---------- sr : int Sampling rate. """ new_audio_path, subfolders = self.get_audio_paths(new_sr) new_audio_folder = subfolders[0] # audio22050/original duplicate_folder_structure(self.audio_path, new_audio_folder) tfm = sox.Transformer() tfm.convert(samplerate=new_sr) for path_to_file in progressbar(list_wav_files(self.audio_path)): path_to_destination = path_to_file.replace(self.audio_path, new_audio_folder) if os.path.exists(path_to_destination): continue tfm.build(path_to_file, path_to_destination)
def generate_file_lists(self): for fold in self.fold_list: audio_folder = os.path.join(self.audio_path, fold) self.file_lists[fold] = list_wav_files(audio_folder) self.wav_to_labels = {} for fold in self.fold_list: for fil in self.file_lists[fold]: label_file = os.path.basename(fil).split('.')[0] + '.txt' self.wav_to_labels[fil] = os.path.join(self.annotations_folder, fold, label_file)
def generate_file_lists(self): self.file_lists = {} for fold in self.fold_list: self.file_lists[fold] = [] # all_files = sorted( # glob.glob(os.path.join(self.audio_path, '*.wav'))) all_files = list_wav_files(self.audio_path) for fil in all_files: basename = self.get_basename_wav(fil) if basename in self.metadata: if self.metadata[basename]['fold'] == fold: self.file_lists[fold].append(fil)
def generate_file_lists(self): self.file_lists = {} all_files = list_wav_files(self.audio_path) assert len(all_files) != 0 for fold in self.fold_list: if fold == 'train': metadata_fold = self.metadata[self.metadata['split'] == fold] else: metadata_fold = self.metadata[( (self.metadata['split'] == fold) & (self.metadata['annotator_id'] == 0))] filename_list_fold = metadata_fold[ 'audio_filename'].drop_duplicates().to_list() self.file_lists[fold] = [] for fil in all_files: basename = os.path.basename(fil) if basename in filename_list_fold: self.file_lists[fold].append(fil)
def extract(self, dataset): """ Extracts features for each file in dataset. Call calculate() for each file in dataset and save the result into the features path. Parameters ---------- dataset : Dataset Instance of the dataset. """ features_path = self.get_features_path(dataset) mkdir_if_not_exists(features_path, parents=True) if not dataset.check_sampling_rate(self.sr): print('Changing sampling rate ...') dataset.change_sampling_rate(self.sr) print('Done!') # Define path to audio and features folders audio_path, subfolders = dataset.get_audio_paths(self.sr) # Duplicate folder structure of audio in features folder duplicate_folder_structure(audio_path, features_path) for audio_folder in subfolders: subfolder_name = os.path.basename(audio_folder) features_path_sub = os.path.join(features_path, subfolder_name) if not self.check_if_extracted_path(features_path_sub): # Navigate in the structure of audio folder and extract # features of the each wav file for path_audio in progressbar(list_wav_files(audio_folder)): features_array = self.calculate(path_audio) path_to_features_file = path_audio.replace( audio_path, features_path) path_to_features_file = path_to_features_file.replace( 'wav', 'npy') np.save(path_to_features_file, features_array) # Save parameters.json for future checking self.set_as_extracted(features_path_sub)
def generate_file_lists(self): for fold in self.fold_list: audio_folder = os.path.join(self.audio_path, fold) self.file_lists[fold] = list_wav_files(audio_folder)