def to_overlap(reference: Annotation) -> Annotation: """Get overlapped speech reference annotation Parameters ---------- reference : Annotation File yielded by pyannote.database protocols. Returns ------- overlap : `pyannote.core.Annotation` Overlapped speech reference. """ overlap = Timeline(uri=reference.uri) for (s1, t1), (s2, t2) in reference.co_iter(reference): l1 = reference[s1, t1] l2 = reference[s2, t2] if l1 == l2: continue overlap.add(s1 & s2) return overlap.support().to_annotation()
if not pyannote_ocr: named_speakers = Annotation(uri=uri) else: named_speakers = argmax_tagger(pyannote_ocr, pyannote_speakers) named_speakers = named_speakers.subset(pyannote_ocr.labels()) path = FUSION.format(repository=REPOSITORY, uri=uri) directory = os.path.dirname(path) if not os.path.exists(directory): os.makedirs(directory) with open(path, 'w') as fp: duplicates = dict() for (speech_turn, track), (_, shot_id) in named_speakers.co_iter(pyannote_shots): original_person_name = named_speakers[speech_turn, track] person_name = mapping.setdefault( original_person_name, get_valid_person_name(original_person_name)) if person_name not in evidences: segment, _ = list(itertools.islice(pyannote_ocr.subset([original_person_name]).itertracks(), 1))[0] evidences[person_name] = { 'person_name': person_name, 'corpus_id': corpus_id, 'video_id': video_id, 'modality': 'written', 'timestamp': segment.middle }
if not pyannote_ocr: named_face = Annotation(uri=uri) else: named_face = argmax_tagger(pyannote_ocr, pyannote_face) named_face = named_face.subset(pyannote_ocr.labels()) path = FUSION.format(repository=REPOSITORY, uri=uri) directory = os.path.dirname(path) if not os.path.exists(directory): os.makedirs(directory) with open(path, 'w') as fp: duplicates = dict() for (segment, track_id), (_, shot_id) in named_face.co_iter(pyannote_shots): original_person_name = named_face[segment, track_id] person_name = mapping.setdefault( original_person_name, get_valid_person_name(original_person_name)) if person_name not in evidences: segment, _ = list(itertools.islice(pyannote_ocr.subset([original_person_name]).itertracks(), 1))[0] evidences[person_name] = { 'person_name': person_name, 'corpus_id': corpus_id, 'video_id': video_id, 'modality': 'written', 'timestamp': segment.middle }