def _extract(data_file_name, settings_features, settings_data, dir_output_dev, dir_output_eva): # Load the data file. data_file = load_numpy_object(data_file_name) # Extract the features. features = feature_extraction(data_file['audio_data'].item(), **settings_features['process']) # Populate the recarray data and dtypes. array_data = (data_file['file_name'].item(), ) dtypes = [('file_name', data_file['file_name'].dtype)] # Check if we keeping the raw audio data. if settings_features['keep_raw_audio_data']: # And add them to the recarray data and dtypes. array_data += (data_file['audio_data'].item(), ) dtypes.append(('audio_data', data_file['audio_data'].dtype)) # Add the rest to the recarray. array_data += (features, data_file['caption'].item(), data_file['caption_ind'].item(), data_file['words_ind'].item(), data_file['chars_ind'].item()) dtypes.extend([('features', np.dtype(object)), ('caption', data_file['caption'].dtype), ('caption_ind', data_file['caption_ind'].dtype), ('words_ind', data_file['words_ind'].dtype), ('chars_ind', data_file['chars_ind'].dtype)]) # Make the recarray np_rec_array = np.rec.array([array_data], dtype=dtypes) # Make the path for serializing the recarray. parent_path = dir_output_dev \ if data_file_name.parent.name == settings_data['audio_dirs']['development'] \ else dir_output_eva file_path = parent_path.joinpath(data_file_name.name) # Dump it. dump_numpy_object(np_rec_array, file_path)
def extract_features_test(root_dir: str, settings_data: MutableMapping[str, Any], settings_features: MutableMapping[str, Any], settings_audio: MutableMapping[str, Any]) \ -> None: """Extracts test features from the audio data of Clotho. :param root_dir: Root dir for the data. :type root_dir: str :param settings_data: Settings for creating data files. :type settings_data: dict[str, T] :param settings_features: Settings for feature extraction. :type settings_features: dict[str, T] :param settings_audio: Settings for the audio. :type settings_audio: dict """ # Get the root directory. dir_root = Path(root_dir) # Get the directories of files. dir_test = dir_root.joinpath(settings_data['audio_dirs']['downloaded'], settings_data['audio_dirs']['test']) audio_exists = False if dir_test.exists() and len(list(dir_test.iterdir())) != 0: audio_exists = True if not audio_exists: raise AttributeError( 'Testing workflow selected, but could not find the test set audio files. ' 'Please download the test set audio before making test predictions.' ) # Get the directories for output. dir_output_test = dir_root.joinpath( settings_data['features_dirs']['output'], settings_data['features_dirs']['test']) words_list = load_pickle_file( dir_root.joinpath(settings_data['pickle_files_dir'], settings_data['files']['words_list_file_name'])) # Create the directories. dir_output_test.mkdir(parents=True, exist_ok=True) # Apply the function to each file and save the result. for data_file_name in filter(lambda _x: _x.is_file(), dir_test.iterdir()): # Load the audio audio = load_audio_file(audio_file=str(data_file_name), sr=int(settings_audio['sr']), mono=settings_audio['to_mono']) # Extract the features. features = feature_extraction(audio, **settings_features['process']) # Populate the recarray data and dtypes. array_data = (data_file_name.name, ) dtypes = [('file_name', f'U{len(data_file_name.name)}')] # Check if we keeping the raw audio data. if settings_features['keep_raw_audio_data']: # And add them to the recarray data and dtypes. array_data += (audio, ) dtypes.append(('audio_data', audio.dtype)) # Add the rest to the recarray. # Word indices are required for the dataloader to work array_data += (features, np.array([ words_list.index('<sos>'), words_list.index('<eos>') ])) dtypes.extend([('features', np.dtype(object)), ('words_ind', np.dtype(object))]) # Make the recarray np_rec_array = np.rec.array([array_data], dtype=dtypes) # Make the path for serializing the recarray. parent_path = dir_output_test file_template = settings_data['files'][ 'np_file_name_template'].replace('_{caption_index}', '') file_path = parent_path.joinpath( file_template.format(audio_file_name=data_file_name.name)) # Dump it. dump_numpy_object(np_rec_array, file_path)
def extract_features(root_dir: str, settings_data: MutableMapping[str, Any], settings_features: MutableMapping[str, Any]) \ -> None: """Extracts features from the audio data of Clotho. :param root_dir: Root dir for the data. :type root_dir: str :param settings_data: Settings for creating data files. :type settings_data: dict[str, T] :param settings_features: Settings for feature extraction. :type settings_features: dict[str, T] """ # Get the root directory. dir_root = Path(root_dir) # Get the directories of files. dir_output = dir_root.joinpath(settings_data['audio_dirs']['output']) dir_dev = dir_output.joinpath(settings_data['audio_dirs']['development']) dir_eva = dir_output.joinpath(settings_data['audio_dirs']['evaluation']) # Get the directories for output. dir_output_dev = dir_root.joinpath( settings_data['features_dirs']['output'], settings_data['features_dirs']['development']) dir_output_eva = dir_root.joinpath( settings_data['features_dirs']['output'], settings_data['features_dirs']['evaluation']) # Create the directories. dir_output_dev.mkdir(parents=True, exist_ok=True) dir_output_eva.mkdir(parents=True, exist_ok=True) # Apply the function to each file and save the result. for data_file_name in filter(lambda _x: _x.suffix == '.npy', chain(dir_dev.iterdir(), dir_eva.iterdir())): # Load the data file. data_file = load_numpy_object(data_file_name) # Extract the features. features = feature_extraction(data_file['audio_data'].item(), **settings_features['process']) # Populate the recarray data and dtypes. array_data = (data_file['file_name'].item(), ) dtypes = [('file_name', data_file['file_name'].dtype)] # Check if we keeping the raw audio data. if settings_features['keep_raw_audio_data']: # And add them to the recarray data and dtypes. array_data += (data_file['audio_data'].item(), ) dtypes.append(('audio_data', data_file['audio_data'].dtype)) # Add the rest to the recarray. array_data += (features, data_file['caption'].item(), data_file['caption_ind'].item(), data_file['words_ind'].item(), data_file['chars_ind'].item()) dtypes.extend([('features', np.dtype(object)), ('caption', data_file['caption'].dtype), ('caption_ind', data_file['caption_ind'].dtype), ('words_ind', data_file['words_ind'].dtype), ('chars_ind', data_file['chars_ind'].dtype)]) # Make the recarray np_rec_array = np.rec.array([array_data], dtype=dtypes) # Make the path for serializing the recarray. parent_path = dir_output_dev \ if data_file_name.parent.name == settings_data['audio_dirs']['development'] \ else dir_output_eva file_path = parent_path.joinpath(data_file_name.name) # Dump it. dump_numpy_object(np_rec_array, file_path)
def create_split_data(csv_split: MutableSequence[MutableMapping[str, str]], dir_split: Path, dir_audio: Path, dir_root: Path, words_list: MutableSequence[str], chars_list: MutableSequence[str], settings_ann: MutableMapping[str, Any], settings_audio: MutableMapping[str, Any], settings_output: MutableMapping[str, Any]) -> None: """Creates the data for the split. :param csv_split: Annotations of the split. :type csv_split: list[collections.OrderedDict] :param dir_split: Directory for the split. :type dir_split: pathlib.Path :param dir_audio: Directory of the audio files for the split. :type dir_audio: pathlib.Path :param dir_root: Root directory of data. :type dir_root: pathlib.Path :param words_list: List of the words. :type words_list: list[str] :param chars_list: List of the characters. :type chars_list: list[str] :param settings_ann: Settings for the annotations. :type settings_ann: dict :param settings_audio: Settings for the audio. :type settings_audio: dict :param settings_output: Settings for the output files. :type settings_output: dict """ # Make sure that the directory exists dir_split.mkdir(parents=True, exist_ok=True) captions_fields = [ settings_ann['captions_fields_prefix'].format(i) for i in range(1, int(settings_ann['nb_captions']) + 1) ] # For each sound: for csv_entry in csv_split: file_name_audio = csv_entry[settings_ann['audio_file_column']] audio = load_audio_file(audio_file=str( dir_root.joinpath(dir_audio, file_name_audio)), sr=int(settings_audio['sr']), mono=settings_audio['to_mono']) for caption_ind, caption_field in enumerate(captions_fields): caption = csv_entry[caption_field] words_caption = get_sentence_words( caption, unique=settings_ann['use_unique_words_per_caption'], keep_case=settings_ann['keep_case'], remove_punctuation=settings_ann['remove_punctuation_words'], remove_specials=not settings_ann['use_special_tokens']) chars_caption = list( chain.from_iterable( clean_sentence(caption, keep_case=settings_ann['keep_case'], remove_punctuation=settings_ann[ 'remove_punctuation_chars'], remove_specials=True))) if settings_ann['use_special_tokens']: chars_caption.insert(0, ' ') chars_caption.insert(0, '<sos>') chars_caption.append(' ') chars_caption.append('<eos>') indices_words = [words_list.index(word) for word in words_caption] indices_chars = [chars_list.index(char) for char in chars_caption] # create the numpy object with all elements np_rec_array = np.rec.array( np.array( (file_name_audio, audio, caption, caption_ind, np.array(indices_words), np.array(indices_chars)), dtype=[('file_name', 'U{}'.format(len(file_name_audio))), ('audio_data', np.dtype(object)), ('caption', 'U{}'.format(len(caption))), ('caption_ind', 'i4'), ('words_ind', np.dtype(object)), ('chars_ind', np.dtype(object))])) # save the numpy object to disk dump_numpy_object( np_obj=np_rec_array, file_name=str( dir_split.joinpath( settings_output['file_name_template'].format( audio_file_name=file_name_audio, caption_index=caption_ind))))