Exemple #1
0
    def to_overlap(reference: Annotation) -> Annotation:
        """Get overlapped speech reference annotation

        Parameters
        ----------
        reference : Annotation
            File yielded by pyannote.database protocols.

        Returns
        -------
        overlap : `pyannote.core.Annotation`
            Overlapped speech reference.
        """

        overlap = Timeline(uri=reference.uri)
        for (s1, t1), (s2, t2) in reference.co_iter(reference):
            l1 = reference[s1, t1]
            l2 = reference[s2, t2]
            if l1 == l2:
                continue
            overlap.add(s1 & s2)
        return overlap.support().to_annotation()
        if not pyannote_ocr:
            named_speakers = Annotation(uri=uri)
        else:
            named_speakers = argmax_tagger(pyannote_ocr, pyannote_speakers)
            named_speakers = named_speakers.subset(pyannote_ocr.labels())

        path = FUSION.format(repository=REPOSITORY, uri=uri)
        directory = os.path.dirname(path)
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(path, 'w') as fp:

            duplicates = dict()

            for (speech_turn, track), (_, shot_id) in named_speakers.co_iter(pyannote_shots):

                original_person_name = named_speakers[speech_turn, track]

                person_name = mapping.setdefault(
                    original_person_name, get_valid_person_name(original_person_name))

                if person_name not in evidences:
                    segment, _ = list(itertools.islice(pyannote_ocr.subset([original_person_name]).itertracks(), 1))[0]
                    evidences[person_name] = {
                        'person_name': person_name,
                        'corpus_id': corpus_id,
                        'video_id': video_id,
                        'modality': 'written',
                        'timestamp': segment.middle
                    }
        if not pyannote_ocr:
            named_face = Annotation(uri=uri)
        else:
            named_face = argmax_tagger(pyannote_ocr, pyannote_face)
            named_face = named_face.subset(pyannote_ocr.labels())

        path = FUSION.format(repository=REPOSITORY, uri=uri)
        directory = os.path.dirname(path)
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(path, 'w') as fp:

            duplicates = dict()

            for (segment, track_id), (_, shot_id) in named_face.co_iter(pyannote_shots):

                original_person_name = named_face[segment, track_id]

                person_name = mapping.setdefault(
                    original_person_name, get_valid_person_name(original_person_name))

                if person_name not in evidences:
                    segment, _ = list(itertools.islice(pyannote_ocr.subset([original_person_name]).itertracks(), 1))[0]
                    evidences[person_name] = {
                        'person_name': person_name,
                        'corpus_id': corpus_id,
                        'video_id': video_id,
                        'modality': 'written',
                        'timestamp': segment.middle
                    }