Python load_audio_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tools.file_io

메소드/함수: load_audio_file

hotexamples.com에서의 예제들: 3

Python load_audio_file - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tools.file_io.load_audio_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: aux_functions.py 프로젝트: sergipc22/clotho-dataset

def check_data_for_split(dir_audio: Path, dir_data: Path, dir_root: Path,
                         csv_split: MutableSequence[MutableMapping[str, str]],
                         settings_ann: MutableMapping[str, Any],
                         settings_audio: MutableMapping[str, Any],
                         settings_cntr: MutableMapping[str, Any]) -> None:
    """Goes through all audio files and checks the created data.

    Gets each audio file and checks if there are associated data. If there are,\
    checks the validity of the raw audio data and the validity of the captions,\
    words, and characters.

    :param dir_audio: Directory with the audio files.
    :type dir_audio: pathlib.Path
    :param dir_data: Directory with the data to be checked.
    :type dir_data: pathlib.Path
    :param dir_root: Root directory.
    :type dir_root: pathlib.Path
    :param csv_split: CSV entries for the data/
    :type csv_split: list[collections.OrderedDict]
    :param settings_ann: Settings for annotations.
    :type settings_ann: dict
    :param settings_audio: Settings for audio.
    :type settings_audio: dict
    :param settings_cntr: Settings for counters.
    :type settings_cntr: dict
    """
    # Load the words and characters lists
    words_list = load_pickle_file(
        dir_root.joinpath(settings_cntr['words_list_file_name']))
    chars_list = load_pickle_file(
        dir_root.joinpath(settings_cntr['characters_list_file_name']))

    for csv_entry in csv_split:
        # Get audio file name
        file_name_audio = Path(csv_entry[settings_ann['audio_file_column']])

        # Check if the audio file existed originally
        if not dir_audio.joinpath(file_name_audio).exists():
            raise FileExistsError(
                'Audio file {f_name_audio} not exists in {d_audio}'.format(
                    f_name_audio=file_name_audio, d_audio=dir_audio))

        # Flag for checking if there are data files for the audio file
        audio_has_data_files = False

        # Get the original audio data
        data_audio_original = load_audio_file(audio_file=str(
            dir_audio.joinpath(file_name_audio)),
                                              sr=int(settings_audio['sr']),
                                              mono=settings_audio['to_mono'])

        for data_file in dir_root.joinpath(dir_data).iterdir():
            # Get the stem of the audio file name
            f_stem = str(data_file).split('file_')[-1].split('.wav_')[0]

            if f_stem == file_name_audio.stem:
                audio_has_data_files = True
                # Get the numpy record array
                data_array = load_numpy_object(data_file)

                # Get the audio data from the numpy record array
                data_audio_rec_array = data_array['audio_data'].item()

                # Compare the lengths
                if len(data_audio_rec_array) != len(data_audio_original):
                    raise ValueError(
                        'File {f_audio} was not saved successfully to the numpy '
                        'object {f_np}.'.format(f_audio=file_name_audio,
                                                f_np=data_file))

                # Check all elements, one to one
                if not all([
                        data_audio_original[i] == data_audio_rec_array[i]
                        for i in range(len(data_audio_original))
                ]):
                    raise ValueError(
                        'Numpy object {} has wrong audio data.'.format(
                            data_file))

                # Get the original caption
                caption_index = data_array['caption_ind'].item()

                # Clean it to remove any spaces before punctuation.
                original_caption = clean_sentence(
                    sentence=csv_entry[settings_ann['captions_fields_prefix'].
                                       format(caption_index + 1)],
                    keep_case=True,
                    remove_punctuation=False,
                    remove_specials=not settings_ann['use_special_tokens'])

                # Check with the file caption
                caption_data_array = clean_sentence(
                    sentence=data_array['caption'].item(),
                    keep_case=True,
                    remove_punctuation=False,
                    remove_specials=not settings_ann['use_special_tokens'])

                if not original_caption == caption_data_array:
                    raise ValueError(
                        'Numpy object {} has wrong caption.'.format(data_file))

                # Since caption in the file is OK, we can use it instead of
                # the original, because it already has the special tokens.
                caption_data_array = clean_sentence(
                    sentence=data_array['caption'].item(),
                    keep_case=settings_ann['keep_case'],
                    remove_punctuation=settings_ann[
                        'remove_punctuation_words'],
                    remove_specials=not settings_ann['use_special_tokens'])

                # Check with the indices of words
                words_indices = data_array['words_ind'].item()
                caption_form_words = ' '.join(
                    [words_list[i] for i in words_indices])

                if not caption_data_array == caption_form_words:
                    raise ValueError(
                        'Numpy object {} has wrong words indices.'.format(
                            data_file))

                # Check with the indices of characters
                caption_from_chars = ''.join(
                    [chars_list[i] for i in data_array['chars_ind'].item()])

                caption_data_array = clean_sentence(
                    sentence=data_array['caption'].item(),
                    keep_case=settings_ann['keep_case'],
                    remove_punctuation=settings_ann[
                        'remove_punctuation_chars'],
                    remove_specials=not settings_ann['use_special_tokens'])

                if not caption_data_array == caption_from_chars:
                    raise ValueError('Numpy object {} has wrong characters '
                                     'indices.'.format(data_file))

        if not audio_has_data_files:
            raise FileExistsError(
                'Audio file {} has no associated data.'.format(
                    file_name_audio))

예제 #2

파일 보기

파일: dataset.py 프로젝트: WangHelin1997/DCASE2020-Task6-PKU

def extract_features_test(root_dir: str,
                          settings_data: MutableMapping[str, Any],
                          settings_features: MutableMapping[str, Any],
                          settings_audio: MutableMapping[str, Any]) \
        -> None:
    """Extracts test features from the audio data of Clotho.
    :param root_dir: Root dir for the data.
    :type root_dir: str
    :param settings_data: Settings for creating data files.
    :type settings_data: dict[str, T]
    :param settings_features: Settings for feature extraction.
    :type settings_features: dict[str, T]
    :param settings_audio: Settings for the audio.
    :type settings_audio: dict
    """
    # Get the root directory.
    dir_root = Path(root_dir)

    # Get the directories of files.
    dir_test = dir_root.joinpath(settings_data['audio_dirs']['downloaded'],
                                 settings_data['audio_dirs']['test'])

    audio_exists = False
    if dir_test.exists() and len(list(dir_test.iterdir())) != 0:
        audio_exists = True
    if not audio_exists:
        raise AttributeError(
            'Testing workflow selected, but could not find the test set audio files. '
            'Please download the test set audio before making test predictions.'
        )

    # Get the directories for output.
    dir_output_test = dir_root.joinpath(
        settings_data['features_dirs']['output'],
        settings_data['features_dirs']['test'])

    words_list = load_pickle_file(
        dir_root.joinpath(settings_data['pickle_files_dir'],
                          settings_data['files']['words_list_file_name']))

    # Create the directories.
    dir_output_test.mkdir(parents=True, exist_ok=True)

    # Apply the function to each file and save the result.
    for data_file_name in filter(lambda _x: _x.is_file(), dir_test.iterdir()):
        # Load the audio
        audio = load_audio_file(audio_file=str(data_file_name),
                                sr=int(settings_audio['sr']),
                                mono=settings_audio['to_mono'])

        # Extract the features.
        features = feature_extraction(audio, **settings_features['process'])

        # Populate the recarray data and dtypes.
        array_data = (data_file_name.name, )
        dtypes = [('file_name', f'U{len(data_file_name.name)}')]

        # Check if we keeping the raw audio data.
        if settings_features['keep_raw_audio_data']:
            # And add them to the recarray data and dtypes.
            array_data += (audio, )
            dtypes.append(('audio_data', audio.dtype))

        # Add the rest to the recarray.
        # Word indices are required for the dataloader to work
        array_data += (features,
                       np.array([
                           words_list.index('<sos>'),
                           words_list.index('<eos>')
                       ]))
        dtypes.extend([('features', np.dtype(object)),
                       ('words_ind', np.dtype(object))])

        # Make the recarray
        np_rec_array = np.rec.array([array_data], dtype=dtypes)

        # Make the path for serializing the recarray.
        parent_path = dir_output_test

        file_template = settings_data['files'][
            'np_file_name_template'].replace('_{caption_index}', '')
        file_path = parent_path.joinpath(
            file_template.format(audio_file_name=data_file_name.name))

        # Dump it.
        dump_numpy_object(np_rec_array, file_path)

예제 #3

파일 보기

파일: aux_functions.py 프로젝트: sergipc22/clotho-dataset

def create_split_data(csv_split: MutableSequence[MutableMapping[str, str]],
                      dir_split: Path, dir_audio: Path, dir_root: Path,
                      words_list: MutableSequence[str],
                      chars_list: MutableSequence[str],
                      settings_ann: MutableMapping[str, Any],
                      settings_audio: MutableMapping[str, Any],
                      settings_output: MutableMapping[str, Any]) -> None:
    """Creates the data for the split.

    :param csv_split: Annotations of the split.
    :type csv_split: list[collections.OrderedDict]
    :param dir_split: Directory for the split.
    :type dir_split: pathlib.Path
    :param dir_audio: Directory of the audio files for the split.
    :type dir_audio: pathlib.Path
    :param dir_root: Root directory of data.
    :type dir_root: pathlib.Path
    :param words_list: List of the words.
    :type words_list: list[str]
    :param chars_list: List of the characters.
    :type chars_list: list[str]
    :param settings_ann: Settings for the annotations.
    :type settings_ann: dict
    :param settings_audio: Settings for the audio.
    :type settings_audio: dict
    :param settings_output: Settings for the output files.
    :type settings_output: dict
    """
    # Make sure that the directory exists
    dir_split.mkdir(parents=True, exist_ok=True)

    captions_fields = [
        settings_ann['captions_fields_prefix'].format(i)
        for i in range(1,
                       int(settings_ann['nb_captions']) + 1)
    ]

    # For each sound:
    for csv_entry in csv_split:
        file_name_audio = csv_entry[settings_ann['audio_file_column']]

        audio = load_audio_file(audio_file=str(
            dir_root.joinpath(dir_audio, file_name_audio)),
                                sr=int(settings_audio['sr']),
                                mono=settings_audio['to_mono'])

        for caption_ind, caption_field in enumerate(captions_fields):
            caption = csv_entry[caption_field]

            words_caption = get_sentence_words(
                caption,
                unique=settings_ann['use_unique_words_per_caption'],
                keep_case=settings_ann['keep_case'],
                remove_punctuation=settings_ann['remove_punctuation_words'],
                remove_specials=not settings_ann['use_special_tokens'])

            chars_caption = list(
                chain.from_iterable(
                    clean_sentence(caption,
                                   keep_case=settings_ann['keep_case'],
                                   remove_punctuation=settings_ann[
                                       'remove_punctuation_chars'],
                                   remove_specials=True)))

            if settings_ann['use_special_tokens']:
                chars_caption.insert(0, ' ')
                chars_caption.insert(0, '<sos>')
                chars_caption.append(' ')
                chars_caption.append('<eos>')

            indices_words = [words_list.index(word) for word in words_caption]
            indices_chars = [chars_list.index(char) for char in chars_caption]

            #   create the numpy object with all elements
            np_rec_array = np.rec.array(
                np.array(
                    (file_name_audio, audio, caption, caption_ind,
                     np.array(indices_words), np.array(indices_chars)),
                    dtype=[('file_name', 'U{}'.format(len(file_name_audio))),
                           ('audio_data', np.dtype(object)),
                           ('caption', 'U{}'.format(len(caption))),
                           ('caption_ind', 'i4'),
                           ('words_ind', np.dtype(object)),
                           ('chars_ind', np.dtype(object))]))

            #   save the numpy object to disk
            dump_numpy_object(
                np_obj=np_rec_array,
                file_name=str(
                    dir_split.joinpath(
                        settings_output['file_name_template'].format(
                            audio_file_name=file_name_audio,
                            caption_index=caption_ind))))