Esempio n. 1
0
    def read_issuers(file_path, corpus):
        if not os.path.isfile(file_path):
            return

        data = jsonfile.read_json_file(file_path)

        for issuer_idx, issuer_data in data.items():
            issuer_type = issuer_data.get('type', None)
            issuer_info = issuer_data.get('info', {})

            if issuer_type == 'speaker':
                gender = issuers.Gender(
                    issuer_data.get('gender', 'unknown').lower())
                age_group = issuers.AgeGroup(
                    issuer_data.get('age_group', 'unknown').lower())
                native_language = issuer_data.get('native_language', None)

                issuer = issuers.Speaker(issuer_idx,
                                         gender=gender,
                                         age_group=age_group,
                                         native_language=native_language,
                                         info=issuer_info)
            elif issuer_type == 'artist':
                name = issuer_data.get('name', None)

                issuer = issuers.Artist(issuer_idx,
                                        name=name,
                                        info=issuer_info)
            else:
                issuer = issuers.Issuer(issuer_idx, info=issuer_info)

            corpus.import_issuers(issuer)
Esempio n. 2
0
    def load_file(self, folder_path, idx, corpus):
        """
        Load speaker, file, utterance, labels
        for the file with the given id.
        """
        xml_path = os.path.join(folder_path, '{}.xml'.format(idx))
        wav_paths = []

        for wav_suffix in WAV_FILE_SUFFIXES:
            wav_path = os.path.join(folder_path,
                                    '{}_{}.wav'.format(idx, wav_suffix))
            wav_name = os.path.split(wav_path)[1]
            wav_idx = os.path.splitext(wav_name)[0]

            if os.path.isfile(
                    wav_path) and wav_idx not in self.invalid_utterance_ids:
                wav_paths.append(wav_path)

        if len(wav_paths) == 0:
            return []

        with open(xml_path, 'r', encoding='utf-8') as f:
            text = f.read()

        transcription = TudaReader.extract_value(text, TRANSCRIPTION_PATTERN,
                                                 'transcription', xml_path)
        transcription_raw = TudaReader.extract_value(
            text, RAW_TRANSCRIPTION_PATTERN, 'raw_transcription', xml_path)
        gender = TudaReader.extract_value(text, GENDER_PATTERN, 'gender',
                                          xml_path)
        is_native = TudaReader.extract_value(text, NATIVE_PATTERN, 'native',
                                             xml_path)
        age_class = TudaReader.extract_value(text, AGE_PATTERN, 'age',
                                             xml_path)
        speaker_idx = TudaReader.extract_value(text, SPEAKER_IDX_PATTERN,
                                               'speaker_idx', xml_path)

        if speaker_idx not in corpus.issuers.keys():
            start_age_class = int(age_class.split('-')[0])

            if start_age_class < 12:
                age_group = issuers.AgeGroup.CHILD
            elif start_age_class < 18:
                age_group = issuers.AgeGroup.YOUTH
            elif start_age_class < 65:
                age_group = issuers.AgeGroup.ADULT
            else:
                age_group = issuers.AgeGroup.SENIOR

            native_lang = None

            if is_native == 'Ja':
                native_lang = 'deu'

            issuer = issuers.Speaker(speaker_idx,
                                     gender=issuers.Gender(gender),
                                     age_group=age_group,
                                     native_language=native_lang)
            corpus.import_issuers(issuer)

        utt_ids = []

        for wav_path in wav_paths:
            wav_name = os.path.split(wav_path)[1]
            wav_idx = os.path.splitext(wav_name)[0]
            corpus.new_file(wav_path, wav_idx)
            utt = corpus.new_utterance(wav_idx, wav_idx, speaker_idx)
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription_raw,
                    idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW))
            utt_ids.append(wav_idx)

        return utt_ids
Esempio n. 3
0
    def load_file(folder_path, idx, corpus):
        """
        Load speaker, file, utterance, labels for the file with the given id.
        """
        xml_path = os.path.join(folder_path, '{}.xml'.format(idx))
        wav_paths = glob.glob(os.path.join(folder_path,
                                           '{}_*.wav'.format(idx)))

        if len(wav_paths) == 0:
            return []

        xml_file = open(xml_path, 'r', encoding='utf-8')
        soup = BeautifulSoup(xml_file, 'lxml')

        transcription = soup.recording.cleaned_sentence.string
        transcription_raw = soup.recording.sentence.string
        gender = soup.recording.gender.string
        is_native = soup.recording.muttersprachler.string
        age_class = soup.recording.ageclass.string
        speaker_idx = soup.recording.speaker_id.string

        if speaker_idx not in corpus.issuers.keys():
            start_age_class = int(age_class.split('-')[0])

            if start_age_class < 12:
                age_group = issuers.AgeGroup.CHILD
            elif start_age_class < 18:
                age_group = issuers.AgeGroup.YOUTH
            elif start_age_class < 65:
                age_group = issuers.AgeGroup.ADULT
            else:
                age_group = issuers.AgeGroup.SENIOR

            native_lang = None

            if is_native == 'Ja':
                native_lang = 'deu'

            issuer = issuers.Speaker(speaker_idx,
                                     gender=issuers.Gender(gender),
                                     age_group=age_group,
                                     native_language=native_lang)
            corpus.import_issuers(issuer)

        utt_ids = []

        for wav_path in wav_paths:
            wav_name = os.path.split(wav_path)[1]
            wav_idx = os.path.splitext(wav_name)[0]
            corpus.new_file(wav_path, wav_idx)
            utt = corpus.new_utterance(wav_idx, wav_idx, speaker_idx)
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))
            utt.set_label_list(
                annotations.LabelList.create_single(
                    transcription_raw,
                    idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW))
            utt_ids.append(wav_idx)

        return utt_ids