def reference(): reference = Annotation() reference[Segment(0, 5)] = 'A' reference[Segment(6, 10)] = 'B' reference[Segment(12, 14)] = 'A' reference[Segment(15, 20)] = 'C' return reference
def test_load(sample): parser = UEMParser() timelines = parser.read(sample) timeline1 = timelines(uri="uri1") assert list(timeline1) == [Segment(1, 3.5), Segment(3, 7.5), Segment(6, 9)]
def vad_construct_pyannote_object_per_file( vad_table_filepath: str, groundtruth_RTTM_file: str ) -> Tuple[Annotation, Annotation]: """ Construct a Pyannote object for evaluation. Args: vad_table_filepath(str) : path of vad rttm-like table. groundtruth_RTTM_file(str): path of groundtruth rttm file. Returns: reference(pyannote.Annotation): groundtruth hypothesis(pyannote.Annotation): prediction """ pred = pd.read_csv(vad_table_filepath, sep=" ", header=None) label = pd.read_csv(groundtruth_RTTM_file, sep=" ", delimiter=None, header=None) label = label.rename(columns={3: "start", 4: "dur", 7: "speaker"}) # construct reference reference = Annotation() for index, row in label.iterrows(): reference[Segment(row['start'], row['start'] + row['dur'])] = row['speaker'] # construct hypothsis hypothesis = Annotation() for index, row in pred.iterrows(): hypothesis[Segment(float(row[0]), float(row[0]) + float(row[1]))] = 'Speech' return reference, hypothesis
def reference_with_overlap(): reference = Annotation() reference[Segment(0, 13)] = 'A' reference[Segment(12, 20)] = 'B' reference[Segment(24, 27)] = 'A' reference[Segment(30, 40)] = 'C' return reference
def _xxx_iter(self, subset): data = self._load_data(subset) AnnotatedGroups = data['annotated'].groupby(by='uri') AnnotationGroups = data['annotation'].groupby(by='uri') for raw_uri, annotated in AnnotatedGroups: uri = f'{raw_uri}.Mix-Headset' segments = [] for segment in annotated.itertuples(): segments.append(Segment(start=segment.start, end=segment.end)) annotation = Annotation(uri=uri) for t, turn in enumerate( AnnotationGroups.get_group(raw_uri).itertuples()): segment = Segment(start=turn.start, end=turn.start + turn.duration) annotation[segment, t] = turn.speaker current_file = { 'database': 'Test', 'uri': uri, 'annotated': Timeline(uri=uri, segments=segments), 'annotation': annotation } yield current_file
def test_crop_strict(annotation): expected = Annotation( uri='TheBigBangTheory.Season01.Episode01', modality='speaker') expected[Segment(5.5, 7), '_'] = 'Leonard' actual = annotation.crop(Segment(5, 9), mode='strict') assert actual == expected, str(actual)
def rttm_to_annotation(input_rttm, collapse_to_speech=False, class_to_keep=None): """ Given a path to a rttm file, create the corresponding Annotation objects containing the triplets (t_beg, t_end, activity) Parameters ---------- input_rttm A path to a rttm file that must exist. Returns ------- An Annotation object. """ anno = Annotation(uri=input_rttm) if os.path.isfile(input_rttm): with open(input_rttm) as fn: for line in fn: row = line.split('\t') t_beg, t_dur, spkr = float(row[3]), float(row[4]), row[7] if row[7] == "": raise ValueError("Speaker role is empty in %s" % os.path.basename(input_rttm)) if class_to_keep is not None and spkr == class_to_keep: # Keep only class of interest anno[Segment(t_beg, t_beg + t_dur)] = spkr elif class_to_keep is None: # Keep all classes anno[Segment(t_beg, t_beg + t_dur)] = spkr return anno
def reference(): reference = Annotation() reference[Segment(0, 10)] = 'A' reference[Segment(12, 20)] = 'B' reference[Segment(24, 27)] = 'A' reference[Segment(30, 40)] = 'C' return reference
def tst_iter(self): # absolute path to 'data' directory where annotations are stored data_dir = Path(__file__).parent / 'data' / 'speaker_diarization' annotated = data_dir / 'fullset.uem' names = ['uri', 'NA0', 'start', 'end'] annotated = read_table(annotated, delim_whitespace=True, names=names) annotated_segments = {} for segment in annotated.itertuples(): annotated_segments[segment.uri] = Segment(start=segment.start, end=segment.end) # iterate through the text annotation files for filename in os.listdir(data_dir): if filename.endswith(".txt"): uri, _ = os.path.splitext(os.path.basename(filename)) annotation = Annotation(uri=uri) names = ['start', 'end', 'speaker', 'speakerID'] parsed_file = read_table(os.path.join(data_dir, filename), delim_whitespace=True, names=names) for t, turn in enumerate(parsed_file.itertuples()): segment = Segment(start=turn.start, end=turn.end) annotation[segment, t] = turn.speakerID current_file = { 'database': 'Odessa', 'uri': uri, 'annotated': Timeline(uri=uri, segments=[annotated_segments[uri]]), 'annotation': annotation} yield current_file
def vad_metrics(predictions, reference_segments, sr=22050, window_length=int(np.floor(0.032 * 22050)), hop_length=int(np.floor(0.016 * 22050))): frame_times = librosa.frames_to_time(range(len(predictions)), sr=sr, hop_length=hop_length, n_fft=window_length) predicted_segments = voice_segments(predictions, frame_times) hypothesis = Annotation() for seg in predicted_segments: hypothesis[Segment(seg[0], seg[1])] = 1 reference = Annotation() for seg in reference_segments: reference[Segment(seg[0], seg[1])] = 1 precision = DetectionPrecision()(reference, hypothesis) error = DetectionErrorRate()(reference, hypothesis) recall = DetectionRecall()(reference, hypothesis) accuracy = DetectionAccuracy()(reference, hypothesis) metrics = { "precision": precision, "error": error, "recall": recall, "accuracy": accuracy } print(metrics) return metrics
def clip_to_annotations(clip_number, lena_mappings, human_mappings): """ Returns (human_annotation, lena_annotation) """ df = pd.read_csv(METADATA_PATH, index_col='ClipNumber') its_filename = df.loc[clip_number].ProcessingFile chat_filename = 'e{}.cha'.format(its_filename.split('.')[0]) textgrid_filename = 'Clip{}.TextGrid'.format(clip_number) lena_dict = lena_chat_to_dict(os.path.join(CHAT_PATH, chat_filename)) textgrid_dict = textgrid_to_dict(os.path.join(TEXTGRID_PATH, textgrid_filename)) # remap lena_dict = remap(lena_dict, lena_mappings) textgrid_dict = remap(textgrid_dict, human_mappings) # set default (silence) class lena_annotation = dict_to_annotation(lena_dict, lena_mappings['SIL']) human_annotation = dict_to_annotation(textgrid_dict, human_mappings['Silence']) start_time = df.loc[clip_number].StartTimeS end_time = start_time + 300 # 5 minutes # The crop doesn't begin at 0, but at start_time, so we need to shift it left. lena_cropped = lena_annotation.crop(Segment(start_time, end_time)) lena_annotation_shifted = Annotation() for segment, track, label in lena_cropped.itertracks(yield_label=True): shifted_segment = Segment(segment.start - start_time, segment.end - start_time) lena_annotation_shifted[shifted_segment, track] = label return human_annotation, lena_annotation_shifted
def __call__(self, sequence=Stream.NoNewData): if isinstance(sequence, More): sequence = sequence.output if sequence in [Stream.EndOfStream, Stream.NoNewData]: return sequence data = sequence.data active = data[0] sw = sequence.sliding_window start = sw[0].middle timeline = Timeline() timeline.start = start for i, y in enumerate(data): if active and not y: segment = Segment(start, sw[i].middle) timeline.add(segment) active = False elif not active and y: active = True start = sw[i].middle if active: segment = Segment(start, sw[i].middle) timeline.add(segment) timeline.end = sw[i].middle return timeline
def __iter__(self): # TODO: running median t, y = zip(*self.iter_dfd()) filtered = scipy.signal.medfilt(y, kernel_size=self._kernel_size) # normalized displaced frame difference normalized = (y - filtered) / filtered # apply threshold on normalized displaced frame difference # in case multiple consecutive value are higher than the threshold, # only keep the first one as a shot boundary. previous = self.video.start _i = 0 for i in np.where(normalized > self.threshold)[0]: if i == _i + 1: _i = i continue yield Segment(previous, t[i]) previous = t[i] _i = i yield Segment(previous, self.video.end)
def add_elan(self, annotator: Annotator, eaf_path: Union[str, Path], selected_tiers: Optional[List[str]] = None, use_tier_as_annotation: bool = False): """ Add an Elan (.eaf) file's content to the Continuum Parameters ---------- annotator: str A string id for the annotator who produced that ELAN file. eaf_path: `Path` or str Path to the .eaf (ELAN) file. selected_tiers: optional list of str If set, will drop tiers that are not contained in this list. use_tier_as_annotation: optional bool If True, the annotation for each non-empty interval will be the name of its parent Tier. """ from pympi import Eaf eaf = Eaf(eaf_path) for tier_name in eaf.get_tier_names(): if selected_tiers is not None and tier_name not in selected_tiers: continue for start, end, value in eaf.get_annotation_data_for_tier( tier_name): if use_tier_as_annotation: self.add(annotator, Segment(start, end), tier_name) else: self.add(annotator, Segment(start, end), value)
def test_set_newlabel(scores): segment, track = Segment(0., 2.5), 'track' scores[segment, track, 'E'] = 1. segment, track = Segment(3., 4.), 'track' assert np.isnan(scores[segment, track, 'E']) segment, track = Segment(3., 4.), 'other_track' assert np.isnan(scores[segment, track, 'E'])
def test_nbest(scores): best1 = scores.nbest(1) assert list(best1.itervalues()) == [(Segment(0, 2.5), 'track', 'C', 0.4), (Segment(3, 4), 'other_track', 'C', 0.3), (Segment(3, 4), 'track', 'B', 0.5)]
def chunks(duration: float, chunk: float = 30, shuffle: bool = False) -> Iterator[Segment]: """Partition [0, duration] time range into smaller chunks Parameters ---------- duration : float Total duration, in seconds. chunk : float, optional Chunk duration, in seconds. Defaults to 30. shuffle : bool, optional Yield chunks in random order. Defaults to chronological order. Yields ------ focus : Segment """ sliding_window = SlidingWindow(start=0.0, step=chunk, duration=chunk) whole = Segment(0, duration) if shuffle: chunks_ = list(chunks(duration, chunk=chunk, shuffle=False)) random.shuffle(chunks_) for chunk in chunks_: yield chunk else: for window in sliding_window(whole): yield window if window.end < duration: yield Segment(window.end, duration)
def write_test_file(data_dir, output_file, trial_length): annotations, max_length, speakers = read_annotaitons(data_dir) # create an artificial non-overlapping segments each of the trial_length size trial_segments = Timeline() for i in range(0, int(max_length) // trial_length): trial_segments.add(Segment(start=i*trial_length, end=(i+1)*trial_length)) with open(output_file, 'w') as f: for label in speakers.keys(): for annotation in annotations: # make sure our trial segments are not extending beyond the total length of the speech data support = annotation.get_timeline().extent() # we consider smaller segment here to make sure an embedding of 3 seconds can be computed adjusted_trial_segments = trial_segments.crop(Segment(start=support.start, end=support.end - 3.), mode='loose') uri = annotation.uri cur_timeline = annotation.label_timeline(label, copy=False) for trial_segment in adjusted_trial_segments: cropped_speaker = cur_timeline.crop(trial_segment, mode='intersection') if not cropped_speaker: f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} nontarget - -\n'.format( label, uri, trial_segment.start, trial_segment.end)) else: f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} target {4:0>7.2f} {5:0>7.2f}\n'.format( label, uri, trial_segment.start, trial_segment.end, cropped_speaker[0].start, cropped_speaker[0].duration))
def test_other_operation(): segment = Segment(start=1, end=9) other_segment = Segment(10, 30) assert segment | other_segment == Segment(1, 30) other_segment = Segment(14, 15) assert segment ^ other_segment == Segment(9, 14)
def test_inclusion(): segment = Segment(start=1, end=9) other_segment = Segment(5, 9) print(other_segment in segment) assert other_segment in segment assert not segment.overlaps(23)
def hypothesis(): hypothesis = Annotation() hypothesis[Segment(2, 13)] = 'A' hypothesis[Segment(13, 14)] = 'D' hypothesis[Segment(14, 20)] = 'B' hypothesis[Segment(22, 38)] = 'C' hypothesis[Segment(38, 40)] = 'D' return hypothesis
def hypothesis(): hypothesis = Annotation() hypothesis[Segment(2, 13)] = 'a' hypothesis[Segment(13, 14)] = 'd' hypothesis[Segment(14, 20)] = 'b' hypothesis[Segment(22, 38)] = 'c' hypothesis[Segment(38, 40)] = 'd' return hypothesis
def test_support(annotation): actual = annotation.support(collar=3.5) expected = Annotation(uri='TheBigBangTheory.Season01.Episode01', modality='speaker') expected[Segment(3, 10), 'B'] = 'Penny' expected[Segment(5.5, 7), 'A'] = 'Leonard' expected[Segment(8, 10), 'C'] = 'Sheldon' assert actual == expected
def test_crop_loose(annotation): expected = Annotation(uri='TheBigBangTheory.Season01.Episode01', modality='speaker') expected[Segment(5.5, 7), '_'] = 'Leonard' expected[Segment(8, 10), '_'] = 'Penny' expected[Segment(8, 10), 'anything'] = 'Sheldon' actual = annotation.crop(Segment(5, 9), mode='loose') assert actual == expected, str(actual)
def hypothesis(): hypothesis = Annotation() hypothesis[Segment(1, 7)] = 'A' hypothesis[Segment(7, 9)] = 'D' hypothesis[Segment(7, 10)] = 'B' hypothesis[Segment(11, 17)] = 'C' hypothesis[Segment(18, 20)] = 'D' return hypothesis
def test_load(sample): parser = MDTMParser() annotations = parser.read(sample) speech1 = annotations(uri="uri1", modality="speech") assert list(speech1.itertracks(label=True)) == [ (Segment(1, 3.5), 0, 'alice'), (Segment(3, 7.5), 1, 'barbara'), (Segment(6, 9), 2, 'chris') ]
def test_union_extent(): first_timeline = Timeline([Segment(0, 1), Segment(2, 3), Segment(4, 5)]) second_timeline = Timeline([Segment(1.5, 6)]) union_timeline = first_timeline.union(second_timeline) assert union_timeline.extent() == Segment(0, 6)
def test_remove_and_extent(): t = Timeline(uri='MyAudioFile') t.add(Segment(6, 8)) t.add(Segment(7, 9)) t.add(Segment(6, 9)) t.remove(Segment(6, 9)) assert t.extent() == Segment(6, 9)
def iter_segments(self, source): """ Parameters ---------- source : float, Segment, Timeline or Annotation If `float`, yield running segments within [0, source). If `Segment`, yield running segments within this segment. If `Timeline`, yield running segments within this timeline. If `Annotation`, yield running segments within its timeline. """ if isinstance(source, Annotation): segments = source.get_timeline() elif isinstance(source, Timeline): segments = source elif isinstance(source, Segment): segments = [source] elif isinstance(source, (int, float)): if not self.duration > 0: raise ValueError('Duration must be strictly positive.') segments = [Segment(0, source)] else: raise TypeError( 'source must be float, Segment, Timeline or Annotation') for segment in segments: # skip segments that are too short if segment.duration < self.min_duration: continue # yield segments shorter than duration # when variable length segments are allowed elif segment.duration < self.duration: if self.variable_length_: yield segment # yield sliding segments within current track else: window = SlidingWindow( duration=self.duration, step=self.step, start=segment.start, end=segment.end) for s in window: # if current window is fully contained by segment if s in segment: yield s # if it is not but variable length segments are allowed elif self.variable_length_: yield Segment(start=segment.end - self.duration, end=segment.end) break
def extrude(self, uem, reference, collar=0.0, skip_overlap=False): """Extrude reference boundary collars from uem reference |----| |--------------| |-------------| uem |---------------------| |-------------------------------| extruded |--| |--| |---| |-----| |-| |-----| |-----------| |-----| Parameters ---------- uem : Timeline Evaluation map. reference : Annotation Reference annotation. collar : float, optional When provided, set the duration of collars centered around reference segment boundaries that are extruded from both reference and hypothesis. Defaults to 0. (i.e. no collar). skip_overlap : bool, optional Set to True to not evaluate overlap regions. Defaults to False (i.e. keep overlap regions). Returns ------- extruded_uem : Timeline """ if collar == 0. and not skip_overlap: return uem collars, overlap_regions = [], [] # build list of collars if needed if collar > 0.: # iterate over all segments in reference for segment in reference.itersegments(): # add collar centered on start time t = segment.start collars.append(Segment(t - .5 * collar, t + .5 * collar)) # add collar centered on end time t = segment.end collars.append(Segment(t - .5 * collar, t + .5 * collar)) # build list of overlap regions if needed if skip_overlap: # iterate over pair of intersecting segments for (segment1, track1), (segment2, track2) in reference.co_iter(reference): if segment1 == segment2 and track1 == track2: continue # add their intersection overlap_regions.append(segment1 & segment2) segments = collars + overlap_regions return Timeline(segments=segments).support().gaps(support=uem)
def test_intersection(): segment = Segment(start=1, end=9) other_segment = Segment(4, 13) assert segment.intersects(other_segment) assert segment & other_segment == Segment(4, 9) other_segment = Segment(13, 20) assert not segment.intersects(other_segment)