def feature_scp_generator(path): """ Return a generator over all feature matrices defined in a scp. """ scp_entries = textfile.read_key_value_lines(path, separator=' ') for utterance_id, rx_specifier in scp_entries.items(): yield utterance_id, KaldiDatasetLoader.read_float_matrix( rx_specifier)
def _load_wavs(self, loading_dataset): wavs_file_path = os.path.join(loading_dataset.path, WAV_FILE_NAME) for wav_id, wav_path in textfile.read_key_value_lines( wavs_file_path).items(): loading_dataset.add_file(os.path.abspath( os.path.join(loading_dataset.path, wav_path)), file_idx=wav_id)
def test_read_key_value_lines(self): file_path = os.path.join(os.path.dirname(__file__), 'key_value_file.txt') expected = {'a': '1', 'b': '2', 'c': '3'} records = textfile.read_key_value_lines(file_path, separator=" ") self.assertDictEqual(expected, records)
def _load_transcriptions(self, loading_dataset): transcriptions_path = os.path.join(loading_dataset.path, TRANSCRIPTION_FILE_NAME) transcriptions_raw_path = os.path.join(loading_dataset.path, TRANSCRIPTION_RAW_FILE_NAME) if os.path.isfile(transcriptions_path): for utt_id, transcription in textfile.read_key_value_lines( transcriptions_path).items(): loading_dataset.add_segmentation( utt_id, segments=transcription, key=data.Segmentation.TEXT_SEGMENTATION) if os.path.isfile(transcriptions_raw_path): for utt_id, transcription_raw in textfile.read_key_value_lines( transcriptions_raw_path).items(): loading_dataset.add_segmentation( utt_id, segments=transcription_raw, key=data.Segmentation.RAW_TEXT_SEGMENTATION)
def _load_speakers(self, loading_dataset): utt2spk_path = os.path.join(loading_dataset.path, UTT2SPK_FILE_NAME) spk_info_path = os.path.join(loading_dataset.path, SPEAKER_INFO_FILE_NAME) if os.path.isfile(spk_info_path): for spk_id, spk_info in jsonfile.read_json_file( spk_info_path).items(): spk_obj = loading_dataset.add_speaker(speaker_idx=spk_id) spk_obj.load_speaker_info_from_dict(spk_info) if os.path.isfile(utt2spk_path): for utt_id, spk_id in textfile.read_key_value_lines( utt2spk_path).items(): loading_dataset.utterances[utt_id].speaker_idx = spk_id
def create_dummy_reco2file(self, data_folder): data_folder = os.path.abspath(data_folder) wav_file = os.path.join(data_folder, 'wav.scp') wavs = textfile.read_key_value_lines( wav_file, separator=' ', ) out = [] for rec_id, rec_path in wavs.items(): filename = os.path.splitext(os.path.basename(rec_path))[0] out.append([rec_id, filename, 'A']) reco_file = os.path.join(data_folder, 'reco2file_and_channel') textfile.write_separated_lines(reco_file, out, separator=' ')
def _load(self, dataset): # load wavs wav_file_path = os.path.join(dataset.path, WAV_FILE_NAME) for file_idx, file_path in textfile.read_key_value_lines( wav_file_path, separator=' ').items(): dataset.add_file(file_path, file_idx=file_idx) # load utterances utt2spk_path = os.path.join(dataset.path, UTT2SPK_FILE_NAME) utt2spk = {} if os.path.isfile(utt2spk_path): utt2spk = textfile.read_key_value_lines(utt2spk_path, separator=' ') segments_path = os.path.join(dataset.path, SEGMENTS_FILE_NAME) if os.path.isfile(segments_path): for utt_id, utt_info in textfile.read_separated_lines_with_first_key( segments_path, separator=' ', max_columns=4).items(): start = None end = None if len(utt_info) > 1: start = utt_info[1] if len(utt_info) > 2: end = utt_info[2] speaker_idx = None if utt_id in utt2spk.keys(): speaker_idx = utt2spk[utt_id] if speaker_idx not in dataset.speakers.keys(): dataset.add_speaker(speaker_idx=speaker_idx) dataset.add_utterance(utt_info[0], utterance_idx=utt_id, speaker_idx=speaker_idx, start=start, end=end) else: for file_idx in dataset.files.keys(): speaker_idx = None if file_idx in utt2spk.keys(): speaker_idx = utt2spk[file_idx] if speaker_idx not in dataset.speakers.keys(): dataset.add_speaker(speaker_idx=speaker_idx) dataset.add_utterance(file_idx, utterance_idx=file_idx, speaker_idx=speaker_idx) # load transcriptions text_path = os.path.join(dataset.path, TRANSCRIPTION_FILE_NAME) for utt_id, transcription in textfile.read_key_value_lines( text_path, separator=' ').items(): dataset.add_segmentation(utt_id, segments=transcription) # load genders gender_path = os.path.join(dataset.path, SPK2GENDER_FILE_NAME) for spk_id, gender in textfile.read_key_value_lines( gender_path, separator=' ').items(): if spk_id in dataset.speakers.keys(): spk = dataset.speakers[spk_id] if gender == 'm': spk.gender = data.Gender.MALE elif gender == 'f': spk.gender = data.Gender.FEMALE
def _load(self, loading_dataset): # Read files file_path = os.path.join(loading_dataset.path, FILES_FILE_NAME) for file_idx, file_path in textfile.read_key_value_lines( file_path, separator=' ').items(): loading_dataset.add_file(os.path.abspath( os.path.join(loading_dataset.path, file_path)), file_idx=file_idx, copy_file=False) # Read speakers speaker_path = os.path.join(loading_dataset.path, SPEAKER_INFO_FILE_NAME) for speaker_idx, speaker_info in jsonfile.read_json_file( speaker_path).items(): speaker = loading_dataset.add_speaker(speaker_idx=speaker_idx) speaker.load_speaker_info_from_dict(speaker_info) # Read utt2spk utt2spk_path = os.path.join(loading_dataset.path, UTT2SPK_FILE_NAME) if os.path.isfile(utt2spk_path): utt2spk = textfile.read_key_value_lines(utt2spk_path, separator=' ') # Read utterances utterance_path = os.path.join(loading_dataset.path, UTTERANCE_FILE_NAME) for utterance_idx, utt_info in textfile.read_separated_lines_with_first_key( utterance_path, separator=' ', max_columns=4).items(): start = None end = None if len(utt_info) > 1: start = float(utt_info[1]) if len(utt_info) > 2: end = float(utt_info[2]) if utterance_idx in utt2spk.keys(): speaker_idx = utt2spk[utterance_idx] loading_dataset.add_utterance(utt_info[0], utterance_idx=utterance_idx, speaker_idx=speaker_idx, start=start, end=end) # Read segmentations for seg_file in glob.glob( os.path.join(loading_dataset.path, 'segmentation_*.txt')): file_name = os.path.basename(seg_file) key = file_name[len('segmentation_'):len(file_name) - len('.txt')] utterance_segments = collections.defaultdict(list) for record in textfile.read_separated_lines_generator( seg_file, separator=' ', max_columns=4): utterance_segments[record[0]].append( data.Token(record[3], float(record[1]), float(record[2]))) for utterance_idx, segments in utterance_segments.items(): loading_dataset.add_segmentation(utterance_idx, segments=segments, key=key) # Read subviews for subview_file in glob.glob( os.path.join(loading_dataset.path, 'subview_*.txt')): file_name = os.path.basename(subview_file) sv_name = file_name[len('subview_'):len(file_name) - len('.txt')] sv = dataset.Subview() for key, value in textfile.read_separated_lines_with_first_key( subview_file, separator=' ').items(): if key == 'filtered_utt_ids': sv.filtered_utterance_idxs = set(value) elif key == 'filtered_speaker_ids': sv.filtered_speaker_idxs = set(value) elif key == 'utterance_idx_patterns': sv.utterance_idx_patterns = set(value) elif key == 'speaker_idx_patterns': sv.speaker_idx_patterns = set(value) elif key == 'utterance_idx_not_patterns': sv.utterance_idx_not_patterns = set(value) elif key == 'speaker_idx_not_patterns': sv.speaker_idx_not_patterns = set(value) loading_dataset.add_subview(sv_name, sv) # Read features feat_path = os.path.join(loading_dataset.path, FEAT_CONTAINER_FILE_NAME) if os.path.isfile(feat_path): for container_name, container_path in textfile.read_key_value_lines( feat_path, separator=' ').items(): loading_dataset.create_feature_container( container_name, container_path)