コード例 #1
0
ファイル: esc.py プロジェクト: ynop/audiomate
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        meta_data = ESC50Reader.load_meta_data(path)

        folds = collections.defaultdict(list)
        esc10_utt_ids = []

        for record in meta_data:
            file_name = record[0]
            file_id = os.path.splitext(file_name)[0]
            file_path = os.path.abspath(os.path.join(path, 'audio', file_name))
            fold = record[1]
            category = record[3]
            esc10 = record[4]

            corpus.new_file(file_path, file_id)
            utt = corpus.new_utterance(file_id, file_id)
            utt.set_label_list(annotations.LabelList.create_single(category, idx=audiomate.corpus.LL_SOUND_CLASS))

            folds['fold-{}'.format(fold)].append(file_id)

            if esc10 == 'True':
                esc10_utt_ids.append(file_id)

        for fold_id, fold_utt_ids in folds.items():
            fold_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(fold_utt_ids))
            fold_sv = subset.Subview(corpus, filter_criteria=[fold_filter])
            corpus.import_subview(fold_id, fold_sv)

        esc10_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(esc10_utt_ids))
        esc10_sv = subset.Subview(corpus, filter_criteria=[esc10_filter])
        corpus.import_subview('esc-10', esc10_sv)

        return corpus
コード例 #2
0
def prepare_corpus(corpus, name):
    if name != 'common_voice':
        print(' - {}: Find utterances that are too long'.format(name))
        too_long = utts_too_long(corpus)
    else:
        too_long = set()

    if name == 'mailabs':
        # we only use mailabs for training
        # since we don't know the speakers
        train_utts = set(corpus.utterances.keys())
        train_utts = train_utts - too_long
        dev_utts = set()
        test_utts = set()

    elif name == 'tuda':
        # we only use kinect-raw files
        # otherwise sentence of the tuda would occur multiple times
        # in contrast to other datasets
        train_utts = set(corpus.subviews['train_kinect-raw'].utterances.keys())
        train_utts = train_utts - too_long
        dev_utts = set(corpus.subviews['dev_kinect-raw'].utterances.keys())
        test_utts = set(corpus.subviews['test_kinect-raw'].utterances.keys())

    elif name == 'common_voice':
        train_utts = set(corpus.subviews['train'].utterances.keys())
        train_utts = train_utts - too_long
        dev_utts = set(corpus.subviews['dev'].utterances.keys())
        test_utts = set(corpus.subviews['test'].utterances.keys())

    else:
        dur_filter = subset.MatchingUtteranceIdxFilter(too_long, inverse=True)
        dur_subview = subset.Subview(corpus, filter_criteria=[dur_filter])
        train, dev, test = create_train_dev_test(dur_subview)

        train_utts = set(train.utterances.keys())
        dev_utts = set(dev.utterances.keys())
        test_utts = set(test.utterances.keys())

    # Remove all subviews
    for subname in list(corpus.subviews.keys()):
        del corpus.subviews[subname]

    # Add new subviews
    train_filter = subset.MatchingUtteranceIdxFilter(train_utts)
    train_subview = subset.Subview(corpus, filter_criteria=[train_filter])
    corpus.import_subview('train', train_subview)

    dev_filter = subset.MatchingUtteranceIdxFilter(dev_utts)
    dev_subview = subset.Subview(corpus, filter_criteria=[dev_filter])
    corpus.import_subview('dev', dev_subview)

    test_filter = subset.MatchingUtteranceIdxFilter(test_utts)
    test_subview = subset.Subview(corpus, filter_criteria=[test_filter])
    corpus.import_subview('test', test_subview)
コード例 #3
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        meta_file_path = os.path.join(path, 'metadata', 'UrbanSound8K.csv')
        meta_data = textfile.read_separated_lines(meta_file_path,
                                                  separator=',',
                                                  max_columns=8)[1:]

        folds = collections.defaultdict(set)

        for record in meta_data:
            file_name = record[0]
            fold = record[5]
            label = record[7]

            file_path = os.path.join(path, 'audio', 'fold{}'.format(fold),
                                     file_name)
            if os.path.isfile(file_path):
                basename = os.path.splitext(file_name)[0]

                corpus.new_file(file_path, basename)
                utt = corpus.new_utterance(basename, basename)
                utt.set_label_list(
                    annotations.LabelList.create_single(
                        label, idx=audiomate.corpus.LL_SOUND_CLASS))
                folds['fold{}'.format(fold)].add(basename)

        for fold_idx, fold_utterance_ids in folds.items():
            utt_filter = subset.MatchingUtteranceIdxFilter(
                utterance_idxs=fold_utterance_ids)
            subview = subset.Subview(corpus, filter_criteria=[utt_filter])

            corpus.import_subview(fold_idx, subview)

        return corpus
コード例 #4
0
    def load_subset(corpus, path, subset_idx):
        """ Load subset into corpus. """
        csv_file = os.path.join(path, '{}.tsv'.format(subset_idx))
        subset_utt_ids = []

        entries = textfile.read_separated_lines_generator(
            csv_file,
            separator='\t',
            max_columns=8,
            ignore_lines_starting_with=['client_id'],
            keep_empty=True
        )

        for entry in entries:

            file_idx = CommonVoiceReader.create_assets_if_needed(
                corpus,
                path,
                entry
            )
            subset_utt_ids.append(file_idx)

        filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(subset_utt_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(subset_idx, subview)
コード例 #5
0
ファイル: mailabs.py プロジェクト: xjc90s/audiomate
    def load_tag(self, corpus, path):
        """
        Iterate over all speakers on load them.
        Collect all utterance-idx and create a subset of them.
        """
        tag_idx = os.path.basename(path)
        data_path = os.path.join(path, 'by_book')
        tag_utt_ids = []

        for gender_path in MailabsReader.get_folders(data_path):
            # IN MIX FOLDERS THERE ARE NO SPEAKERS
            # HANDLE EVERY UTT AS DIFFERENT ISSUER
            if os.path.basename(gender_path) == 'mix':
                utt_ids = self.load_books_of_speaker(corpus, gender_path, None)
                tag_utt_ids.extend(utt_ids)

            else:
                for speaker_path in MailabsReader.get_folders(gender_path):
                    speaker = MailabsReader.load_speaker(corpus, speaker_path)
                    utt_ids = self.load_books_of_speaker(
                        corpus, speaker_path, speaker)

                    tag_utt_ids.extend(utt_ids)

        filter = subset.MatchingUtteranceIdxFilter(
            utterance_idxs=set(tag_utt_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(tag_idx, subview)
コード例 #6
0
ファイル: swc.py プロジェクト: val260/audiomate
    def _load(self, path):
        corpus = super(SWCReader, self)._load(path)

        utt_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(INVALID_UTT_IDS), inverse=True)
        filtered = subset.Subview(corpus, filter_criteria=utt_filter)

        return audiomate.Corpus.from_corpus(filtered)
コード例 #7
0
ファイル: common_voice.py プロジェクト: toddrme2178/audiomate
    def load_subset(corpus, path, subset_idx):
        """ Load subset into corpus. """
        csv_file = os.path.join(path, '{}.csv'.format(subset_idx))
        utt_ids = []

        for entry in textfile.read_separated_lines_generator(
                csv_file,
                separator=',',
                max_columns=8,
                ignore_lines_starting_with=['filename']):
            rel_file_path = entry[0]
            filename = os.path.split(rel_file_path)[1]
            basename = os.path.splitext(filename)[0]
            transcription = entry[1]
            age = CommonVoiceReader.map_age(entry[4])
            gender = CommonVoiceReader.map_gender(entry[5])

            idx = '{}-{}'.format(subset_idx, basename)
            file_path = os.path.join(path, rel_file_path)

            corpus.new_file(file_path, idx)
            issuer = assets.Speaker(idx, gender=gender, age_group=age)
            corpus.import_issuers(issuer)
            utterance = corpus.new_utterance(idx, idx, issuer.idx)
            utterance.set_label_list(
                assets.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

            utt_ids.append(idx)

        filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(utt_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(subset_idx, subview)
コード例 #8
0
ファイル: test_subview.py プロジェクト: ynop/audiomate
def run(corpus, filters):
    subview = subset.Subview(corpus, filters)
    subview.utterances
    subview.issuers
    subview.tracks
    subview.num_utterances
    subview.num_issuers
    subview.num_tracks
コード例 #9
0
ファイル: aed.py プロジェクト: toddrme2178/audiomate
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        test_folder = os.path.join(path, 'test')
        train_folder = os.path.join(path, 'train')

        test_utterance_ids = AEDReader.load_folder(test_folder, corpus)
        train_utterance_ids = AEDReader.load_folder(train_folder, corpus)

        test_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=test_utterance_ids)
        train_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=train_utterance_ids)

        test_subset = subset.Subview(corpus, filter_criteria=[test_filter])
        train_subset = subset.Subview(corpus, filter_criteria=[train_filter])

        corpus.import_subview('test', test_subset)
        corpus.import_subview('train', train_subset)

        return corpus
コード例 #10
0
ファイル: test_iterator.py プロジェクト: ynop/audiomate
    def test_init_with_corpus_view(self):
        corpus = resources.create_dataset()
        subview = subset.Subview(
            corpus,
            filter_criteria=[
                subset.MatchingUtteranceIdxFilter(
                    utterance_idxs={'utt-1', 'utt-2', 'utt-4'})
            ])

        it = feeding.DataIterator(subview, [containers.Container('blub')])
        assert set(it.utt_ids) == set(subview.utterances.keys())
コード例 #11
0
ファイル: fluent_speech.py プロジェクト: xjc90s/audiomate
    def load_part(base_path, part_name, corpus, speakers):
        part_file_path = os.path.join(base_path, 'data',
                                      '{}_data.csv'.format(part_name))
        entries = textfile.read_separated_lines_generator(
            part_file_path,
            separator=',',
            max_columns=7,
            ignore_lines_starting_with=[','])

        part_ids = []

        for entry in entries:
            file_path = entry[1]
            file_base = os.path.basename(file_path)
            idx = os.path.splitext(file_base)[0]
            speaker_idx = entry[2]
            part_ids.append(idx)

            if speaker_idx not in corpus.issuers.keys():
                corpus.import_issuers(speakers[speaker_idx])

            track = corpus.new_file(os.path.join(base_path, file_path), idx)

            utt = corpus.new_utterance(idx, track.idx, speaker_idx)

            transcription = annotations.LabelList.create_single(
                entry[3], idx=audiomate.corpus.LL_WORD_TRANSCRIPT)
            utt.set_label_list(transcription)

            if entry[4] != 'none':
                action = annotations.LabelList.create_single(entry[4],
                                                             idx='action')
                utt.set_label_list(action)

            if entry[5] != 'none':
                object_label = annotations.LabelList.create_single(
                    entry[5], idx='object')
                utt.set_label_list(object_label)

            if entry[6] != 'none':
                location = annotations.LabelList.create_single(entry[6],
                                                               idx='location')
                utt.set_label_list(location)

        filter = subset.MatchingUtteranceIdxFilter(
            utterance_idxs=set(part_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(part_name, subview)
コード例 #12
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        speaker_info_path = os.path.join(path, 'SPEAKERS.TXT')
        speakers = LibriSpeechReader.load_speakers(speaker_info_path)

        sf = LibriSpeechReader.available_subfolders

        for subset_idx, subset_path in sf(path, SUBSETS.keys()).items():
            subset_utt_ids = set()

            for speaker_idx, speaker_path in sf(subset_path).items():
                corpus.import_issuers(speakers[speaker_idx])

                for chapter_idx, chapter_path in sf(speaker_path).items():
                    transcript_path = os.path.join(
                        chapter_path,
                        '{}-{}.trans.txt'.format(speaker_idx, chapter_idx)
                    )
                    transcripts = LibriSpeechReader.load_transcripts(transcript_path)

                    for utt_idx, transcript in transcripts.items():
                        file_path = os.path.join(chapter_path, '{}.flac'.format(utt_idx))
                        corpus.new_file(file_path, utt_idx)

                        utterance = corpus.new_utterance(
                            utt_idx,
                            utt_idx,
                            speaker_idx
                        )

                        utterance.set_label_list(
                            annotations.LabelList.create_single(
                                transcript,
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT
                            )
                        )

                        subset_utt_ids.add(utt_idx)

            filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(subset_utt_ids))
            subview = subset.Subview(corpus, filter_criteria=[filter])
            corpus.import_subview(subset_idx, subview)

        return corpus
コード例 #13
0
ファイル: test_dataset.py プロジェクト: ynop/audiomate
    def test_init_with_corpus_view(self, tmpdir):
        c = containers.Container(os.path.join(tmpdir.strpath, 'test.h5'))
        c.open()
        c.set('utt-1', data=np.arange(20))
        c.set('utt-2', data=np.arange(20))
        c.set('utt-3', data=np.arange(20))
        c.set('utt-4', data=np.arange(20))
        c.set('utt-5', data=np.arange(20))

        corpus = resources.create_dataset()
        subview = subset.Subview(
            corpus,
            filter_criteria=[
                subset.MatchingUtteranceIdxFilter(
                    utterance_idxs={'utt-1', 'utt-2', 'utt-4'})
            ])

        it = feeding.Dataset(subview, [c])
        assert it.utt_ids == ['utt-1', 'utt-2', 'utt-4']
コード例 #14
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in ['TEST', 'TRAIN']:
            part_path = os.path.join(path, part)
            part_utt_ids = set()

            for region in os.listdir(part_path):
                region_path = os.path.join(part_path, region)

                if os.path.isdir(region_path):

                    for speaker_abbr in os.listdir(region_path):
                        speaker_path = os.path.join(region_path, speaker_abbr)
                        speaker_idx = speaker_abbr[1:]

                        if speaker_idx not in corpus.issuers.keys():
                            issuer = assets.Speaker(speaker_idx)

                            if speaker_abbr[:1] == 'M':
                                issuer.gender = assets.Gender.MALE
                            elif speaker_abbr[:1] == 'F':
                                issuer.gender = assets.Gender.FEMALE

                            corpus.import_issuers(issuer)

                        for wav_path in glob.glob(
                                os.path.join(speaker_path, '*.WAV')):
                            sentence_idx = os.path.splitext(
                                os.path.basename(wav_path))[0]
                            utt_idx = '{}-{}-{}'.format(
                                region, speaker_abbr, sentence_idx).lower()
                            part_utt_ids.add(utt_idx)

                            raw_text_path = os.path.join(
                                speaker_path, '{}.TXT'.format(sentence_idx))
                            raw_text = textfile.read_separated_lines(
                                raw_text_path, separator=' ',
                                max_columns=3)[0][2]

                            words_path = os.path.join(
                                speaker_path, '{}.WRD'.format(sentence_idx))
                            words = textfile.read_separated_lines(
                                words_path, separator=' ', max_columns=3)

                            phones_path = os.path.join(
                                speaker_path, '{}.PHN'.format(sentence_idx))
                            phones = textfile.read_separated_lines(
                                phones_path, separator=' ', max_columns=3)

                            corpus.new_file(wav_path, utt_idx)
                            utt = corpus.new_utterance(utt_idx, utt_idx,
                                                       speaker_idx)

                            raw_ll = assets.LabelList.create_single(
                                raw_text,
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW)
                            utt.set_label_list(raw_ll)

                            word_ll = assets.LabelList(
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT)

                            for record in words:
                                start = int(record[0]) / 16000
                                end = int(record[1]) / 16000
                                word_ll.append(
                                    assets.Label(record[2],
                                                 start=start,
                                                 end=end))

                            utt.set_label_list(word_ll)

                            phone_ll = assets.LabelList(
                                idx=audiomate.corpus.LL_PHONE_TRANSCRIPT)

                            for record in phones:
                                start = int(record[0]) / 16000
                                end = int(record[1]) / 16000
                                phone_ll.append(
                                    assets.Label(record[2],
                                                 start=start,
                                                 end=end))

                            utt.set_label_list(phone_ll)

            filter = subset.MatchingUtteranceIdxFilter(
                utterance_idxs=part_utt_ids)
            subview = subset.Subview(corpus, filter_criteria=[filter])
            corpus.import_subview(part, subview)

        return corpus
コード例 #15
0
def run(download_folder, output_folder):
    corpora_names = [
        ('common_voice', 'common-voice'),
        ('mailabs', 'mailabs'),
        ('swc', 'swc'),
        ('tuda', 'tuda'),
        ('voxforge', 'voxforge'),
    ]

    print('Load corpora')
    corpora = {}

    for name, reader_type in corpora_names:
        print(' - {} ...'.format(name))
        full_path = os.path.join(download_folder, name)
        c = audiomate.Corpus.load(full_path, reader=reader_type)
        corpora[name] = c

    print('Create Train/Dev/Test - if not already exist')
    for name, corpus in corpora.items():
        prepare_corpus(corpus, name)

    print('Insert full subviews')
    #
    #   Insert subviews containing all utterances
    #   so we have a reference when merged
    #
    for name, corpus in corpora.items():
        all_utts = set(corpus.utterances.keys())
        full_filter = subset.MatchingUtteranceIdxFilter(all_utts)
        full_subview = subset.Subview(corpus, filter_criteria=[full_filter])
        corpus.import_subview('full', full_subview)

    print('Suffix subviews')
    #
    #   Suffix subviews to have the correct names when merging
    #
    for name, corpus in corpora.items():
        print(' - {} ...'.format(name))
        original_subview_names = list(corpus.subviews.keys())

        for subview_name in original_subview_names:
            new_subview_name = '{}_{}'.format(subview_name, name)
            corpus.subviews[new_subview_name] = corpus.subviews[subview_name]
            del corpus.subviews[subview_name]

    print('Merge corpora ...')
    full_corpus = audiomate.Corpus.merge_corpora(list(corpora.values()))

    print('Create merged train/test/dev subviews ...')
    for part in ['train', 'dev', 'test']:
        utt_ids = set()

        for name, corpus in corpora.items():
            sv = full_corpus.subviews['{}_{}'.format(part, name)]
            utt_ids.update(sv.utterances.keys())

        part_filter = subset.MatchingUtteranceIdxFilter(utt_ids)
        part_subview = subset.Subview(corpus, filter_criteria=[part_filter])
        full_corpus.import_subview(part, part_subview)

    print('Save ...')
    os.makedirs(output_folder)
    full_corpus.save_at(output_folder)