def read(xml, max_time=4, max_words=None): """ Transform XML, an ElementTree.Element representation of Koemei's <segmentation> output, into a series of spindle clips and associated speaker objects. Returns a dict with two values: CLIPS and SPEAKERS """ speakers = dict() clips = [] for segment in xml.findall('segment'): start = float(segment.find('start').text.strip()) / 100 end = float(segment.find('end').text.strip()) / 100 speaker_name = segment.find('speaker').text.strip() try: speaker = speakers[speaker_name] except KeyError: speaker = Speaker(name=speaker_name) speakers[speaker_name] = speaker segment_clips = words_to_clips(segment_to_words(segment), max_time=max_time, max_words=max_words, speaker=speaker) clip = segment_clips.next() clip.begin_para = True clips.append(clip) for clip in segment_clips: clips.append(clip) return dict(clips=clips, speakers=speakers.values())
def read_clips(input, **kwargs): """Read Sphinx4 output and segment into clips. `input' should be a file object, or anything that can be iterated to produce lines of text. Any keyword arguments are passed unchanged to words_to_clips. Yields Clip objects segmented in two ways: (1) Sphinx segment tokens <s> begin a new clip (2) Within Sphinx segments, words are split into clips using words_to_clips, so by default each clip is limited to a maximum length of 4 seconds.""" for segment in segments(remove_silences(read_tokens(input))): for clip in words_to_clips(segment, **kwargs): yield clip