def write_test_file(data_dir, output_file, trial_length): annotations, max_length, speakers = read_annotaitons(data_dir) # create an artificial non-overlapping segments each of the trial_length size trial_segments = Timeline() for i in range(0, int(max_length) // trial_length): trial_segments.add(Segment(start=i*trial_length, end=(i+1)*trial_length)) with open(output_file, 'w') as f: for label in speakers.keys(): for annotation in annotations: # make sure our trial segments are not extending beyond the total length of the speech data support = annotation.get_timeline().extent() # we consider smaller segment here to make sure an embedding of 3 seconds can be computed adjusted_trial_segments = trial_segments.crop(Segment(start=support.start, end=support.end - 3.), mode='loose') uri = annotation.uri cur_timeline = annotation.label_timeline(label, copy=False) for trial_segment in adjusted_trial_segments: cropped_speaker = cur_timeline.crop(trial_segment, mode='intersection') if not cropped_speaker: f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} nontarget - -\n'.format( label, uri, trial_segment.start, trial_segment.end)) else: f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} target {4:0>7.2f} {5:0>7.2f}\n'.format( label, uri, trial_segment.start, trial_segment.end, cropped_speaker[0].start, cropped_speaker[0].duration))
def test_crop_mapping(): timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) cropped, mapping = timeline.crop(Segment(1, 2), returns_mapping=True) expected_cropped = Timeline([Segment(1, 2)]) assert cropped == expected_cropped expected_mapping = {Segment(1, 2): [Segment(0, 2), Segment(1, 2)]} assert mapping == expected_mapping
def test_union(): first_timeline = Timeline([Segment(0, 1), Segment(2, 3), Segment(4, 5)]) second_timeline = Timeline([Segment(1.5, 4.5)]) assert first_timeline.union(second_timeline) == Timeline( [Segment(0, 1), Segment(1.5, 4.5), Segment(2, 3), Segment(4, 5)]) assert second_timeline.crop(first_timeline) == Timeline( [Segment(2, 3), Segment(4, 4.5)]) assert list(first_timeline.co_iter(second_timeline)) == [ (Segment(2, 3), Segment(1.5, 4.5)), (Segment(4, 5), Segment(1.5, 4.5)) ]
def test_union(): first_timeline = Timeline([Segment(0, 1), Segment(2, 3), Segment(4, 5)]) second_timeline = Timeline([Segment(1.5, 4.5)]) assert first_timeline.union(second_timeline) == Timeline([Segment(0, 1), Segment(1.5, 4.5), Segment(2, 3), Segment(4, 5)]) assert second_timeline.crop(first_timeline) == Timeline([Segment(2, 3), Segment(4, 4.5)]) assert list(first_timeline.co_iter(second_timeline)) == [(Segment(2, 3), Segment(1.5, 4.5)), (Segment(4, 5), Segment(1.5, 4.5))]
def remove_excluded(self): if len(self.excluded) == 0: return from pyannote.core import Segment, Timeline segments = [] for recording, _segments in self.segments.groupby( "recording_filename"): sampled = Timeline(segments=[ Segment(segment_onset, segment_offset) for segment_onset, segment_offset in _segments[ ["segment_onset", "segment_offset"]].values ]) excl_segments = self.excluded.loc[ self.excluded["recording_filename"] == recording] excl = Timeline(segments=[ Segment(segment_onset, segment_offset) for segment_onset, segment_offset in excl_segments[ ["segment_onset", "segment_offset"]].values ]) # sampled = sampled.extrude(sampled) # not released yet extent_tl = Timeline([sampled.extent()], uri=sampled.uri) truncating_support = excl.gaps(support=extent_tl) sampled = sampled.crop(truncating_support, mode="intersection") segments.append( pd.DataFrame( [[recording, s.start, s.end] for s in sampled], columns=[ "recording_filename", "segment_onset", "segment_offset" ], )) self.segments = pd.concat(segments)
def __call__(self, reference, hypothesis): if isinstance(reference, Annotation): reference = reference.get_timeline() if isinstance(hypothesis, Annotation): hypothesis = hypothesis.get_timeline() # over-segmentation over = Timeline(uri=reference.uri) prev_r = reference[0] intersection = [] for r, h in reference.co_iter(hypothesis): if r != prev_r: intersection = sorted(intersection) for _, segment in intersection[:-1]: over.add(segment) intersection = [] prev_r = r segment = r & h intersection.append((segment.duration, segment)) intersection = sorted(intersection) for _, segment in intersection[:-1]: over.add(segment) # under-segmentation under = Timeline(uri=reference.uri) prev_h = hypothesis[0] intersection = [] for h, r in hypothesis.co_iter(reference): if h != prev_h: intersection = sorted(intersection) for _, segment in intersection[:-1]: under.add(segment) intersection = [] prev_h = h segment = h & r intersection.append((segment.duration, segment)) intersection = sorted(intersection) for _, segment in intersection[:-1]: under.add(segment) # extent extent = reference.extent() # correct (neither under- nor over-segmented) correct = under.union(over).gaps(support=extent) # frontier error (both under- and over-segmented) frontier = under.crop(over) # under-segmented not_over = over.gaps(support=extent) only_under = under.crop(not_over) # over-segmented not_under = under.gaps(support=extent) only_over = over.crop(not_under) status = Annotation(uri=reference.uri) # for segment in correct: # status[segment, '_'] = 'correct' for segment in frontier: status[segment, '_'] = 'shift' for segment in only_over: status[segment, '_'] = 'over-segmentation' for segment in only_under: status[segment, '_'] = 'under-segmentation' return status.support()
def __call__(self, reference, hypothesis): if isinstance(reference, Annotation): reference = reference.get_timeline() if isinstance(hypothesis, Annotation): hypothesis = hypothesis.get_timeline() # over-segmentation over = Timeline(uri=reference.uri) prev_r = reference[0] intersection = [] for r, h in reference.co_iter(hypothesis): if r != prev_r: intersection = sorted(intersection) for _, segment in intersection[:-1]: over.add(segment) intersection = [] prev_r = r segment = r & h intersection.append((segment.duration, segment)) intersection = sorted(intersection) for _, segment in intersection[:-1]: over.add(segment) # under-segmentation under = Timeline(uri=reference.uri) prev_h = hypothesis[0] intersection = [] for h, r in hypothesis.co_iter(reference): if h != prev_h: intersection = sorted(intersection) for _, segment in intersection[:-1]: under.add(segment) intersection = [] prev_h = h segment = h & r intersection.append((segment.duration, segment)) intersection = sorted(intersection) for _, segment in intersection[:-1]: under.add(segment) # extent extent = reference.extent() # correct (neither under- nor over-segmented) correct = under.union(over).gaps(focus=extent) # frontier error (both under- and over-segmented) frontier = under.crop(over) # under-segmented not_over = over.gaps(focus=extent) only_under = under.crop(not_over) # over-segmented not_under = under.gaps(focus=extent) only_over = over.crop(not_under) status = Annotation(uri=reference.uri) for segment in correct: status[segment, '_'] = 'correct' for segment in frontier: status[segment, '_'] = 'frontier' for segment in only_over: status[segment, '_'] = 'over' for segment in only_under: status[segment, '_'] = 'under' return status.smooth()