コード例 #1
0
    def _partition(self, timeline, coverage):

        # boundaries (as set of timestamps)
        boundaries = set([])
        for segment in timeline:
            boundaries.add(segment.start)
            boundaries.add(segment.end)

        # partition (as timeline)
        partition = Annotation()
        for start, end in pairwise(sorted(boundaries)):
            segment = Segment(start, end)
            partition[segment] = '_'

        cropped = partition.crop(coverage, mode='intersection')

        return partition.crop(coverage, mode='intersection').anonymize_tracks()
コード例 #2
0
    def _partition(self, timeline, coverage):

        # boundaries (as set of timestamps)
        boundaries = set([])
        for segment in timeline:
            boundaries.add(segment.start)
            boundaries.add(segment.end)

        # partition (as timeline)
        partition = Annotation()
        for start, end in pairwise(sorted(boundaries)):
            segment = Segment(start, end)
            partition[segment] = '_'

        return partition.crop(coverage, mode='intersection').anonymize_tracks()
コード例 #3
0
    def _xxx_try_iter(self, subset):

        # load "who speaks when" reference
        data = self._load_data(subset)

        diarization = getattr(self, 'diarization', True)
        if diarization:
            AnnotationGroups = data['annotation'].groupby(by='uri')
        else:
            AnnotationGroups = data['annotation'].groupby(
                by=['uri', 'speaker'])

        # load trials
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        trials = data_dir / f'{subset}.trial.txt'
        names = ['model_id', 'uri', 'start', 'end', 'target', 'first', 'total']
        trials = read_table(trials, delim_whitespace=True, names=names)

        for trial in trials.itertuples():

            model_id = trial.model_id

            # FIE038_m1 ==> FIE038
            # FIE038_m42 ==> FIE038
            # Bernard_Pivot_m1 ==> Bernard_Pivot
            speaker = '_'.join(model_id.split('_')[:-1])

            # append Mix-Headset to uri
            raw_uri = trial.uri
            uri = f'{raw_uri}.Mix-Headset'

            # trial session
            try_with = Segment(start=trial.start, end=trial.end)

            if diarization:
                # 'annotation' & 'annotated' are needed when diarization is set
                # therefore, this needs a bit more work than when set to False.

                annotation = Annotation(uri=uri)
                turns = AnnotationGroups.get_group(raw_uri)
                for t, turn in enumerate(turns.itertuples()):
                    segment = Segment(start=turn.start,
                                      end=turn.start + turn.duration)
                    if not (segment & try_with):
                        continue
                    annotation[segment, t] = turn.speaker

                annotation = annotation.crop(try_with)
                reference = annotation.label_timeline(speaker)
                annotated = Timeline(uri=uri, segments=[try_with])

                # pack & yield trial
                current_trial = {
                    'database': 'Test',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                    'annotation': annotation,
                    'annotated': annotated,
                }

            else:
                # 'annotation' & 'annotated' are not needed when diarization is
                # set to False -- leading to a faster implementation...
                segments = []
                if trial.target == 'target':
                    turns = AnnotationGroups.get_group((raw_uri, speaker))
                    for t, turn in enumerate(turns.itertuples()):
                        segment = Segment(start=turn.start,
                                          end=turn.start + turn.duration)
                        segments.append(segment)
                reference = Timeline(uri=uri, segments=segments).crop(try_with)

                # pack & yield trial
                current_trial = {
                    'database': 'Test',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                }

            yield current_trial
コード例 #4
0
                dic_trackID_st_to_speakingFace[int(TrackID_st)] = [int(TrackID_Face), float(proba)]

        trackID_face_to_name = {}
        for s, t, name in NamedSpk.itertracks(label=True):
            if dic_trackID_st_to_speakingFace[t][0] != '': 
                trackID_face_to_name[dic_trackID_st_to_speakingFace[t][0]] = name

        namedFaces = Annotation(uri=videoID)
        for s, t, faceID in faces.itertracks(label=True):
            if t in trackID_face_to_name: 
                namedFaces[s, t] = trackID_face_to_name[t]

        # write person visible and speaking in a shot:
        for sshot, tshot, shot in shots.itertracks(label=True):
            NamedSpkShot = NamedSpk.crop(sshot)
            NamedFaceShot = namedFaces.crop(sshot)
            PersonShot = set(NamedSpkShot.labels()) & set(NamedFaceShot.labels())

            for p in (PersonShot & set(evidences.keys())):
                conf = 0.0
                for sSpk in NamedSpkShot.label_timeline(p):
                    for sON, tON, name in ON.itertracks(label=True):
                        if name == p:
                            sInter = sON & sSpk
                            if sInter : c = 1.0 + sInter.duration
                            else:
                                sDist = sON ^ sSpk
                                if sDist.duration == 0 : c=1.0
                                else: c = 1/sDist.duration
                            if c > conf: conf = c
コード例 #5
0
def gecko_JSON_to_Annotation(gecko_JSON,
                             uri=None,
                             modality='speaker',
                             confidence_threshold=0.0,
                             collar=0.0,
                             expected_min_speech_time=0.0,
                             manual=False):
    """
    Parameters:
    -----------
    gecko_JSON : `dict`
        loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON
    uri (uniform resource identifier) : `str`
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core
    confidence_threshold : `float`, Optional.
        The segments with confidence under confidence_threshold won't be added to UEM file.
        Defaults to keep every segment (i.e. 0.0)
    collar: `float`, Optional.
        Merge tracks with same label and separated by less than `collar` seconds.
        Defaults to keep tracks timeline untouched (i.e. 0.0)
    expected_min_speech_time: `float`, Optional.
        Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user).
        Defaults to never suspect anything (i.e. 0.0)
    manual : `bool`
        Whether the json is coming from a manual correction or straight from
        the forced-alignment output.
        In the former case, the regions timing is used. `confidence_threshold`
            and `collar` are thus irrelevant.
        In the latter case (default), the timing of each term is used.

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the gecko_JSON files (depends on confidence_threshold)
    """
    annotation = Annotation(uri, modality)
    not_annotated = Timeline(uri=uri)
    for monologue in gecko_JSON["monologues"]:
        if not monologue:
            continue
        # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt
        # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35
        speaker_ids = re.split("@|\+", monologue["speaker"]["id"])
        if manual:
            for speaker_id in speaker_ids:  # most of the time there's only one
                if speaker_id != '':  # happens with "all@"
                    annotation[Segment(monologue["start"], monologue["end"]),
                               speaker_id] = speaker_id
        else:
            for i, term in enumerate(monologue["terms"]):
                for speaker_id in speaker_ids:  # most of the time there's only one
                    if speaker_id != '':  # happens with "all@"
                        annotation[Segment(term["start"], term["end"]),
                                   speaker_id] = speaker_id
                if term["confidence"] <= confidence_threshold:
                    not_annotated.add(Segment(term["start"], term["end"]))

    if manual:
        annotated = Timeline([Segment(0.0, monologue["end"])], uri)
    else:
        annotation = annotation.support(collar)
        annotated = not_annotated.gaps(support=Segment(0.0, term["end"]))
    total_speech_time = annotation.crop(annotated).get_timeline().duration()
    if total_speech_time < expected_min_speech_time:
        warnings.warn(
            f"total speech time of {uri} is only {total_speech_time})")
    return annotation, annotated
コード例 #6
0
def gecko_JSON_to_UEM(gecko_JSON,
                      uri=None,
                      modality='speaker',
                      confidence_threshold=0.0,
                      collar=0.0,
                      expected_min_speech_time=0.0):
    """
    Parameters:
    -----------
    gecko_JSON : `dict`
        loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON
    uri (uniform resource identifier) : `str`
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core
    confidence_threshold : `float`, Optional.
        The segments with confidence under confidence_threshold won't be added to UEM file.
        Defaults to keep every segment (i.e. 0.0)
    collar: `float`, Optional.
        Merge tracks with same label and separated by less than `collar` seconds.
        Defaults to keep tracks timeline untouched (i.e. 0.0)
    expected_min_speech_time: `float`, Optional.
        Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user).
        Defaults to never suspect anything (i.e. 0.0)

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the gecko_JSON files (depends on confidence_threshold)
    """
    annotation = Annotation(uri, modality)
    annotated = Timeline(uri=uri)
    last_confident = 0.0
    last_unconfident = 0.0
    for monologue in gecko_JSON["monologues"]:
        if not monologue:
            continue
        # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt
        # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35
        speaker_ids = re.split("@|\+", monologue["speaker"]["id"])
        for i, term in enumerate(monologue["terms"]):
            term["confidence"], term["start"], term["end"] = map(
                float,
                (term.get("confidence", 0.), term["start"], term["end"]))
            unknown = False
            for speaker_id in speaker_ids:  # most of the time there's only one
                if '#unknown#' in speaker_id:
                    unknown = True
                if speaker_id != '':  # happens with "all@"
                    annotation[Segment(term["start"], term["end"]),
                               speaker_id] = speaker_id
            if term["confidence"] <= confidence_threshold:
                last_unconfident = term["end"]
            else:
                if last_unconfident < last_confident and not unknown:
                    annotated.add(Segment(last_confident, term["end"]))
                last_confident = term["start"]

    annotation = annotation.support(collar)
    total_speech_time = annotation.crop(annotated).get_timeline().duration()
    if total_speech_time < expected_min_speech_time:
        warnings.warn(
            f"total speech time of {uri} is only {total_speech_time})")
    return annotation, annotated.support()
コード例 #7
0
    def tst_try_iter(self):
        def get_turns(uri):
            ref_file_path = Path(__file__).parent / 'data' / 'speaker_diarization' / uri
            ref_file_path = Path(str(ref_file_path) + '.txt')
            gt_names = ['start', 'end', 'speaker', 'speakerID']
            return read_table(os.path.join(data_dir, ref_file_path), delim_whitespace=True, names=gt_names)

        diarization = getattr(self, 'diarization', True)

        # load trials
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        trials = data_dir / 'tst.trial.txt'
        names = ['model_id', 'uri', 'start', 'end', 'target', 'first', 'total']
        trials = read_table(trials, delim_whitespace=True, names=names)

        for trial in trials.itertuples():

            model_id = trial.model_id

            speaker = model_id

            uri = trial.uri

            # trial session
            try_with = Segment(start=trial.start, end=trial.end)

            if diarization:
                # 'annotation' & 'annotated' are needed when diarization is set
                # therefore, this needs a bit more work than when set to False.

                annotation = Annotation(uri=uri)
                turns = get_turns(uri)
                for t, turn in enumerate(turns.itertuples()):
                    segment = Segment(start=turn.start,
                                      end=turn.end)
                    if not (segment & try_with):
                        continue
                    annotation[segment, t] = turn.speakerID

                annotation = annotation.crop(try_with)
                reference = annotation.label_timeline(speaker)
                annotated = Timeline(uri=uri, segments=[try_with])

                # pack & yield trial
                current_trial = {
                    'database': 'Odessa',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                    'annotation': annotation,
                    'annotated': annotated,
                }

            else:
                # 'annotation' & 'annotated' are not needed when diarization is
                # set to False -- leading to a faster implementation...
                segments = []
                if trial.target == 'target':
                    turns = get_turns(uri).groupby(by='speakerID')
                    for t, turn in enumerate(turns.get_group(speaker).itertuples()):
                        segment = Segment(start=turn.start,
                                          end=turn.end)
                        segments.append(segment)
                reference = Timeline(uri=uri, segments=segments).crop(try_with)

                # pack & yield trial
                current_trial = {
                    'database': 'Odessa',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                }

            yield current_trial