Beispiel #1
0
    def read_genders(genders_path, corpus):
        if os.path.isfile(genders_path):
            speakers = textfile.read_key_value_lines(genders_path,
                                                     separator=' ')

            for speaker_idx, gender_str in speakers.items():
                if gender_str == 'm':
                    gender = issuers.Gender.MALE
                else:
                    gender = issuers.Gender.FEMALE

                speaker = issuers.Speaker(speaker_idx, gender=gender)
                corpus.import_issuers(speaker)
Beispiel #2
0
    def load_books_of_speaker(self, corpus, path, speaker):
        """
        Load all utterances for the speaker at the given path.
        """
        utt_ids = []

        for book_path in MailabsReader.get_folders(path):
            meta_path = os.path.join(book_path, 'metadata.csv')
            wavs_path = os.path.join(book_path, 'wavs')

            meta = textfile.read_separated_lines(meta_path,
                                                 separator='|',
                                                 max_columns=3)

            for entry in meta:
                file_basename = entry[0]
                transcription_raw = entry[1]
                transcription_clean = entry[2]

                if speaker is None:
                    idx = file_basename
                    utt_speaker = issuers.Speaker(idx)
                    speaker_idx = idx
                    corpus.import_issuers(utt_speaker)
                else:
                    idx = '{}-{}'.format(speaker.idx, file_basename)
                    speaker_idx = speaker.idx

                wav_name = '{}.wav'.format(file_basename)
                wav_path = os.path.join(wavs_path, wav_name)

                if os.path.isfile(
                        wav_path) and idx not in self.invalid_utterance_ids:
                    corpus.new_file(wav_path, idx)

                    ll_raw = annotations.LabelList.create_single(
                        transcription_raw,
                        idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW)

                    ll_clean = annotations.LabelList.create_single(
                        transcription_clean,
                        idx=audiomate.corpus.LL_WORD_TRANSCRIPT)

                    utterance = corpus.new_utterance(idx, idx, speaker_idx)
                    utterance.set_label_list(ll_raw)
                    utterance.set_label_list(ll_clean)

                    utt_ids.append(utterance.idx)

        return utt_ids
Beispiel #3
0
    def _create_or_get_speech_issuer(corpus, file_idx, labels):
        if file_idx not in labels:
            return None

        issuer = issuers.Speaker(file_idx)

        if file_idx in labels:
            if labels[file_idx][0] == 'm':
                issuer.gender = issuers.Gender.MALE
            elif labels[file_idx][0] == 'f':
                issuer.gender = issuers.Gender.FEMALE

        corpus.import_issuers(issuer)

        return file_idx
Beispiel #4
0
    def parse_speaker_info(readme_path):
        """ Parse speaker info and return tuple (idx, gender). """
        idx = None
        gender = issuers.Gender.UNKNOWN
        age_group = issuers.AgeGroup.UNKNOWN
        native_lang = None

        with open(readme_path, 'r', errors='ignore') as f:
            for raw_line in f:
                line = raw_line.strip()

                if line is not None and line != '':
                    line = line.rstrip(';.')
                    parts = line.split(':', maxsplit=1)

                    if len(parts) > 1:
                        key = parts[0].strip().lower()
                        value = parts[1].strip()

                        if key == 'user name':
                            idx = value

                        value = value.lower()

                        if key == 'gender':
                            if value in ['männlich', 'male', 'mnnlich']:
                                gender = issuers.Gender.MALE
                            elif value in ['weiblich', 'female', '[female]']:
                                gender = issuers.Gender.FEMALE

                        if key == 'age range':
                            if value in ['erwachsener', 'adult', '[adult]', '[erwachsener]']:
                                age_group = issuers.AgeGroup.ADULT
                            elif value in ['senior', '[senior']:
                                age_group = issuers.AgeGroup.SENIOR
                            elif value in ['youth', 'jugendlicher', '[youth]', '[jugendlicher]']:
                                age_group = issuers.AgeGroup.YOUTH
                            elif value in ['kind', 'child']:
                                age_group = issuers.AgeGroup.CHILD

                        if key == 'language':
                            if value in ['de', 'ger', 'deu', '[de]']:
                                native_lang = 'deu'
                            elif value in ['en', 'eng', '[en]']:
                                native_lang = 'eng'

        return issuers.Speaker(idx, gender=gender, age_group=age_group, native_language=native_lang)
Beispiel #5
0
    def load_speaker(corpus, path):
        """ Create a speaker instance for the given path.  """
        base_path, speaker_name = os.path.split(path)
        base_path, gender_desc = os.path.split(base_path)
        base_path, _ = os.path.split(base_path)
        base_path, _ = os.path.split(base_path)

        gender = issuers.Gender.UNKNOWN

        if gender_desc == 'male':
            gender = issuers.Gender.MALE
        elif gender_desc == 'female':
            gender = issuers.Gender.FEMALE

        speaker = issuers.Speaker(speaker_name, gender=gender)
        corpus.import_issuers(speaker)

        return speaker
Beispiel #6
0
    def _load_folder(folder_entry, corpus):
        """ Load the given subfolder into the corpus (e.g. bed, one, ...) """
        for wav_path in glob.glob(os.path.join(folder_entry.path, '*.wav')):
            wav_name = os.path.basename(wav_path)
            basename, __ = os.path.splitext(wav_name)

            command = folder_entry.name
            file_idx = '{}_{}'.format(basename, command)
            issuer_idx = str(basename).split('_', maxsplit=1)[0]

            corpus.new_file(wav_path, file_idx)

            if issuer_idx not in corpus.issuers.keys():
                corpus.import_issuers(issuers.Speaker(issuer_idx))

            utt = corpus.new_utterance(file_idx, file_idx, issuer_idx)

            labels = annotations.LabelList.create_single(
                command, idx=audiomate.corpus.LL_WORD_TRANSCRIPT)
            utt.set_label_list(labels)
Beispiel #7
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for file_path in glob.glob(os.path.join(path, 'recordings', '*.wav')):
            file_idx = os.path.splitext(os.path.basename(file_path))[0]

            corpus.new_file(file_path, file_idx)

            idx_parts = file_idx.split('_')
            digit = idx_parts[0]
            issuer_idx = '_'.join(idx_parts[1:-1])

            if issuer_idx not in corpus.issuers.keys():
                issuer = issuers.Speaker(issuer_idx)
                corpus.import_issuers(issuer)

            utterance = corpus.new_utterance(file_idx, file_idx, issuer_idx)
            utterance.set_label_list(
                annotations.LabelList.create_single(
                    str(digit), idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

        return corpus
Beispiel #8
0
    def create_assets_if_needed(corpus, path, entry):
        """ Create File/Utterance/Issuer, if they not already exist and return utt-idx. """
        file_name = entry[1]
        file_idx, _ = os.path.splitext(file_name)

        if file_idx in INVALID_UTTS:
            return None

        if file_idx not in corpus.utterances.keys():
            speaker_idx = entry[0]
            transcription = entry[2]

            if len(entry) >= 6:
                age = CommonVoiceReader.map_age(entry[5])
            else:
                age = issuers.AgeGroup.UNKNOWN

            if len(entry) >= 7:
                gender = CommonVoiceReader.map_gender(entry[6])
            else:
                gender = issuers.Gender.UNKNOWN

            file_path = os.path.join(path, 'clips', file_name)
            corpus.new_file(file_path, file_idx)

            if speaker_idx in corpus.issuers.keys():
                issuer = corpus.issuers[speaker_idx]
            else:
                issuer = issuers.Speaker(speaker_idx,
                                         gender=gender,
                                         age_group=age)
                corpus.import_issuers(issuer)

            utterance = corpus.new_utterance(file_idx, file_idx, issuer.idx)
            utterance.set_label_list(
                annotations.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

        return file_idx
Beispiel #9
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)
        data_path = os.path.join(path, 'data')
        meta_data = AudioMNISTReader.load_speaker_meta(path)

        for speaker_idx in os.listdir(data_path):
            speaker_path = os.path.join(data_path, speaker_idx)

            if os.path.isdir(speaker_path):

                for file_path in glob.glob(os.path.join(speaker_path,
                                                        '*.wav')):
                    file_idx = os.path.splitext(os.path.basename(file_path))[0]

                    corpus.new_file(file_path, file_idx)

                    idx_parts = file_idx.split('_')
                    digit = idx_parts[0]

                    if speaker_idx not in corpus.issuers.keys():
                        issuer = issuers.Speaker(
                            speaker_idx,
                            gender=AudioMNISTReader.get_gender(
                                meta_data, speaker_idx),
                            age_group=AudioMNISTReader.get_age_group(
                                meta_data, speaker_idx))
                        corpus.import_issuers(issuer)

                    utterance = corpus.new_utterance(file_idx, file_idx,
                                                     speaker_idx)
                    utterance.set_label_list(
                        annotations.LabelList.create_single(
                            str(digit),
                            idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

        return corpus
Beispiel #10
0
def generate_issuers(n, rand=None):
    if rand is None:
        rand = random.Random()

    items = []

    for issuer_index in range(n):
        issuer_idx = 'issuer-{}'.format(issuer_index)

        issuer_type = rand.randint(1, 3)

        if issuer_type == 1:
            issuer = issuers.Speaker(issuer_idx,
                                     gender=issuers.Gender.UNKNOWN,
                                     age_group=issuers.AgeGroup.CHILD,
                                     native_language='de')
        elif issuer_type == 2:
            issuer = issuers.Artist(issuer_idx, 'badam')
        else:
            issuer = issuers.Issuer(issuer_idx)

        items.append(issuer)

    return items
Beispiel #11
0
    def load_file(self, folder_path, idx, corpus):
        """
        Load speaker, file, utterance, labels
        for the file with the given id.
        """
        xml_path = os.path.join(folder_path, '{}.xml'.format(idx))
        wav_paths = []

        for wav_suffix in WAV_FILE_SUFFIXES:
            wav_path = os.path.join(folder_path,
                                    '{}_{}.wav'.format(idx, wav_suffix))
            wav_name = os.path.split(wav_path)[1]
            wav_idx = os.path.splitext(wav_name)[0]

            if os.path.isfile(
                    wav_path) and wav_idx not in self.invalid_utterance_ids:
                wav_paths.append(wav_path)

        if len(wav_paths) == 0:
            return []

        with open(xml_path, 'r', encoding='utf-8') as f:
            text = f.read()

        transcription = TudaReader.extract_value(text, TRANSCRIPTION_PATTERN,
                                                 'transcription', xml_path)
        transcription_raw = TudaReader.extract_value(
            text, RAW_TRANSCRIPTION_PATTERN, 'raw_transcription', xml_path)
        gender = TudaReader.extract_value(text, GENDER_PATTERN, 'gender',
                                          xml_path)
        is_native = TudaReader.extract_value(text, NATIVE_PATTERN, 'native',
                                             xml_path)
        age_class = TudaReader.extract_value(text, AGE_PATTERN, 'age',
                                             xml_path)
        speaker_idx = TudaReader.extract_value(text, SPEAKER_IDX_PATTERN,
                                               'speaker_idx', xml_path)

        if speaker_idx not in corpus.issuers.keys():
            start_age_class = int(age_class.split('-')[0])

            if start_age_class < 12:
                age_group = issuers.AgeGroup.CHILD
            elif start_age_class < 18:
                age_group = issuers.AgeGroup.YOUTH
            elif start_age_class < 65:
                age_group = issuers.AgeGroup.ADULT
            else:
                age_group = issuers.AgeGroup.SENIOR

            native_lang = None

            if is_native == 'Ja':
                native_lang = 'deu'

            issuer = issuers.Speaker(speaker_idx,
                                     gender=issuers.Gender(gender),
                                     age_group=age_group,
                                     native_language=native_lang)
            corpus.import_issuers(issuer)

        utt_ids = []

        for wav_path in wav_paths:
            wav_name = os.path.split(wav_path)[1]
            wav_idx = os.path.splitext(wav_name)[0]
            corpus.new_file(wav_path, wav_idx)
            utt = corpus.new_utterance(wav_idx, wav_idx, speaker_idx)
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription_raw,
                    idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW))
            utt_ids.append(wav_idx)

        return utt_ids
Beispiel #12
0
    def load_file(folder_path, idx, corpus):
        """
        Load speaker, file, utterance, labels for the file with the given id.
        """
        xml_path = os.path.join(folder_path, '{}.xml'.format(idx))
        wav_paths = glob.glob(os.path.join(folder_path,
                                           '{}_*.wav'.format(idx)))

        if len(wav_paths) == 0:
            return []

        xml_file = open(xml_path, 'r', encoding='utf-8')
        soup = BeautifulSoup(xml_file, 'lxml')

        transcription = soup.recording.cleaned_sentence.string
        transcription_raw = soup.recording.sentence.string
        gender = soup.recording.gender.string
        is_native = soup.recording.muttersprachler.string
        age_class = soup.recording.ageclass.string
        speaker_idx = soup.recording.speaker_id.string

        if speaker_idx not in corpus.issuers.keys():
            start_age_class = int(age_class.split('-')[0])

            if start_age_class < 12:
                age_group = issuers.AgeGroup.CHILD
            elif start_age_class < 18:
                age_group = issuers.AgeGroup.YOUTH
            elif start_age_class < 65:
                age_group = issuers.AgeGroup.ADULT
            else:
                age_group = issuers.AgeGroup.SENIOR

            native_lang = None

            if is_native == 'Ja':
                native_lang = 'deu'

            issuer = issuers.Speaker(speaker_idx,
                                     gender=issuers.Gender(gender),
                                     age_group=age_group,
                                     native_language=native_lang)
            corpus.import_issuers(issuer)

        utt_ids = []

        for wav_path in wav_paths:
            wav_name = os.path.split(wav_path)[1]
            wav_idx = os.path.splitext(wav_name)[0]
            corpus.new_file(wav_path, wav_idx)
            utt = corpus.new_utterance(wav_idx, wav_idx, speaker_idx)
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription_raw,
                    idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW))
            utt_ids.append(wav_idx)

        return utt_ids
Beispiel #13
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in ['TEST', 'TRAIN']:
            part_path = os.path.join(path, part)
            part_utt_ids = set()

            for region in os.listdir(part_path):
                region_path = os.path.join(part_path, region)

                if os.path.isdir(region_path):

                    for speaker_abbr in os.listdir(region_path):
                        speaker_path = os.path.join(region_path, speaker_abbr)
                        speaker_idx = speaker_abbr[1:]

                        if speaker_idx not in corpus.issuers.keys():
                            issuer = issuers.Speaker(speaker_idx)

                            if speaker_abbr[:1] == 'M':
                                issuer.gender = issuers.Gender.MALE
                            elif speaker_abbr[:1] == 'F':
                                issuer.gender = issuers.Gender.FEMALE

                            corpus.import_issuers(issuer)

                        for wav_path in glob.glob(
                                os.path.join(speaker_path, '*.WAV')):
                            sentence_idx = os.path.splitext(
                                os.path.basename(wav_path))[0]
                            utt_idx = '{}-{}-{}'.format(
                                region, speaker_abbr, sentence_idx).lower()
                            part_utt_ids.add(utt_idx)

                            raw_text_path = os.path.join(
                                speaker_path, '{}.TXT'.format(sentence_idx))
                            raw_text = textfile.read_separated_lines(
                                raw_text_path, separator=' ',
                                max_columns=3)[0][2]

                            words_path = os.path.join(
                                speaker_path, '{}.WRD'.format(sentence_idx))
                            words = textfile.read_separated_lines(
                                words_path, separator=' ', max_columns=3)

                            phones_path = os.path.join(
                                speaker_path, '{}.PHN'.format(sentence_idx))
                            phones = textfile.read_separated_lines(
                                phones_path, separator=' ', max_columns=3)

                            corpus.new_file(wav_path, utt_idx)
                            utt = corpus.new_utterance(utt_idx, utt_idx,
                                                       speaker_idx)

                            raw_ll = annotations.LabelList.create_single(
                                raw_text,
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW)
                            utt.set_label_list(raw_ll)

                            word_ll = annotations.LabelList(
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT)

                            for record in words:
                                start = int(record[0]) / 16000
                                end = int(record[1]) / 16000
                                word_ll.addl(record[2], start=start, end=end)

                            utt.set_label_list(word_ll)

                            phone_ll = annotations.LabelList(
                                idx=audiomate.corpus.LL_PHONE_TRANSCRIPT)

                            for record in phones:
                                start = int(record[0]) / 16000
                                end = int(record[1]) / 16000
                                phone_ll.addl(record[2], start=start, end=end)

                            utt.set_label_list(phone_ll)

            utt_filter = subset.MatchingUtteranceIdxFilter(
                utterance_idxs=part_utt_ids)
            subview = subset.Subview(corpus, filter_criteria=[utt_filter])
            corpus.import_subview(part, subview)

        return corpus
Beispiel #14
0
    def _load(self, path):
        corpus = audiomate.Corpus()

        article_paths = sorted(self.get_articles(path))
        reader_map = {}
        file_map = {}

        for article_idx, article_path in enumerate(article_paths):
            audio_files = self.get_audio_file_info(article_path)
            reader_name, reader_gender = self.get_reader_info(article_path)
            segments = self.get_segments(article_path)

            if reader_name not in reader_map.keys():
                speaker = issuers.Speaker(
                    '{:0>8}'.format(len(reader_map)),
                    gender=reader_gender
                )
                reader_map[reader_name] = speaker
                corpus.import_issuers(speaker)
            else:
                speaker = reader_map[reader_name]

            for start, end, text in segments:
                file_path = self.find_audio_file_for_segment(start, end, audio_files)

                if file_path is not None:
                    if file_path not in file_map.keys():
                        track = tracks.FileTrack(
                            '{:0>10}'.format(len(file_map)),
                            file_path
                        )
                        file_map[file_path] = track
                        corpus.import_tracks(track)
                    else:
                        track = file_map[file_path]

                    track_offset = audio_files[file_path]
                    utt_start = start - track_offset
                    utt_end = end - track_offset

                    utt_idx = '{}_{}_{}_{}'.format(
                        speaker.idx,
                        track.idx,
                        int(start * 1000),
                        int(end * 1000)
                    )

                    if utt_idx not in self.invalid_utterance_ids:
                        utt = corpus.new_utterance(
                            utt_idx,
                            track.idx,
                            issuer_idx=speaker.idx,
                            start=utt_start,
                            end=utt_end
                        )

                        ll = annotations.LabelList.create_single(
                            text,
                            audiomate.corpus.LL_WORD_TRANSCRIPT
                        )

                        utt.set_label_list(ll)

        return audiomate.Corpus.from_corpus(corpus)