def read_files(file_path, corpus): path = os.path.dirname(file_path) for file_idx, file_path in textfile.read_key_value_lines( file_path, separator=' ').items(): corpus.new_file(os.path.join(path, file_path), file_idx=file_idx, copy_file=False)
def feature_scp_generator(path): """ Return a generator over all feature matrices defined in a scp. """ scp_entries = textfile.read_key_value_lines(path, separator=' ') for utterance_id, rx_specifier in scp_entries.items(): yield utterance_id, KaldiWriter.read_float_matrix(rx_specifier)
def read_transcriptions(text_path, corpus): transcriptions = textfile.read_key_value_lines(text_path, separator=' ') for utt_id, transcription in transcriptions.items(): ll = annotations.LabelList.create_single( transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT) corpus.utterances[utt_id].set_label_list(ll)
def read_feature_containers(feat_path, corpus): if os.path.isfile(feat_path): base_path = os.path.dirname(feat_path) containers = textfile.read_key_value_lines(feat_path, separator=' ') for container_name, container_path in containers.items(): corpus.new_feature_container(container_name, path=os.path.join( base_path, container_path))
def test_read_key_value_lines(self): file_path = os.path.join(os.path.dirname(__file__), 'key_value_file.txt') expected = {'a': '1', 'b': '2', 'c': '3'} records = textfile.read_key_value_lines(file_path, separator=' ') self.assertDictEqual(expected, records)
def read_utt_to_issuer_mapping(utt_issuer_path, corpus): utt_issuers = {} if os.path.isfile(utt_issuer_path): for utt_id, issuer_idx in textfile.read_key_value_lines(utt_issuer_path, separator=' ').items(): if issuer_idx in corpus.issuers.keys(): utt_issuers[utt_id] = corpus.issuers[issuer_idx] else: utt_issuers[utt_id] = corpus.new_issuer(issuer_idx=issuer_idx) return utt_issuers
def read_genders(genders_path, corpus): if os.path.isfile(genders_path): speakers = textfile.read_key_value_lines(genders_path, separator=' ') for speaker_idx, gender_str in speakers.items(): if gender_str == 'm': gender = issuers.Gender.MALE else: gender = issuers.Gender.FEMALE speaker = issuers.Speaker(speaker_idx, gender=gender) corpus.import_issuers(speaker)
def parse_prompts(etc_folder): """ Read prompts and prompts-orignal and return as dictionary (id as key). """ prompts_path = os.path.join(etc_folder, 'PROMPTS') prompts_orig_path = os.path.join(etc_folder, 'prompts-original') prompts = textfile.read_key_value_lines(prompts_path, separator=' ') prompts_orig = textfile.read_key_value_lines(prompts_orig_path, separator=' ') prompts_key_fixed = {} for k, v in prompts.items(): parts = k.split('/') key = k if len(parts) > 1: key = parts[-1] prompts_key_fixed[key] = v prompts = prompts_key_fixed return prompts, prompts_orig