예제 #1
0
 def read_files(file_path, corpus):
     path = os.path.dirname(file_path)
     for file_idx, file_path in textfile.read_key_value_lines(
             file_path, separator=' ').items():
         corpus.new_file(os.path.join(path, file_path),
                         file_idx=file_idx,
                         copy_file=False)
예제 #2
0
파일: kaldi.py 프로젝트: ynop/audiomate
    def feature_scp_generator(path):
        """ Return a generator over all feature matrices defined in a scp. """

        scp_entries = textfile.read_key_value_lines(path, separator=' ')

        for utterance_id, rx_specifier in scp_entries.items():
            yield utterance_id, KaldiWriter.read_float_matrix(rx_specifier)
예제 #3
0
파일: kaldi.py 프로젝트: ynop/audiomate
 def read_transcriptions(text_path, corpus):
     transcriptions = textfile.read_key_value_lines(text_path,
                                                    separator=' ')
     for utt_id, transcription in transcriptions.items():
         ll = annotations.LabelList.create_single(
             transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT)
         corpus.utterances[utt_id].set_label_list(ll)
예제 #4
0
 def read_feature_containers(feat_path, corpus):
     if os.path.isfile(feat_path):
         base_path = os.path.dirname(feat_path)
         containers = textfile.read_key_value_lines(feat_path,
                                                    separator=' ')
         for container_name, container_path in containers.items():
             corpus.new_feature_container(container_name,
                                          path=os.path.join(
                                              base_path, container_path))
예제 #5
0
    def test_read_key_value_lines(self):
        file_path = os.path.join(os.path.dirname(__file__),
                                 'key_value_file.txt')

        expected = {'a': '1', 'b': '2', 'c': '3'}

        records = textfile.read_key_value_lines(file_path, separator=' ')

        self.assertDictEqual(expected, records)
예제 #6
0
파일: default.py 프로젝트: val260/audiomate
    def read_utt_to_issuer_mapping(utt_issuer_path, corpus):
        utt_issuers = {}

        if os.path.isfile(utt_issuer_path):
            for utt_id, issuer_idx in textfile.read_key_value_lines(utt_issuer_path, separator=' ').items():
                if issuer_idx in corpus.issuers.keys():
                    utt_issuers[utt_id] = corpus.issuers[issuer_idx]
                else:
                    utt_issuers[utt_id] = corpus.new_issuer(issuer_idx=issuer_idx)

        return utt_issuers
예제 #7
0
파일: kaldi.py 프로젝트: xjc90s/audiomate
    def read_genders(genders_path, corpus):
        if os.path.isfile(genders_path):
            speakers = textfile.read_key_value_lines(genders_path, separator=' ')

            for speaker_idx, gender_str in speakers.items():
                if gender_str == 'm':
                    gender = issuers.Gender.MALE
                else:
                    gender = issuers.Gender.FEMALE

                speaker = issuers.Speaker(speaker_idx, gender=gender)
                corpus.import_issuers(speaker)
예제 #8
0
    def parse_prompts(etc_folder):
        """ Read prompts and prompts-orignal and return as dictionary (id as key). """
        prompts_path = os.path.join(etc_folder, 'PROMPTS')
        prompts_orig_path = os.path.join(etc_folder, 'prompts-original')

        prompts = textfile.read_key_value_lines(prompts_path, separator=' ')
        prompts_orig = textfile.read_key_value_lines(prompts_orig_path, separator=' ')

        prompts_key_fixed = {}

        for k, v in prompts.items():
            parts = k.split('/')
            key = k

            if len(parts) > 1:
                key = parts[-1]

            prompts_key_fixed[key] = v

        prompts = prompts_key_fixed

        return prompts, prompts_orig