コード例 #1
0
    def _turn_level(self, current_file: dict,
                    speech_turns: Annotation) -> Annotation:
        """Apply clustering at speech turn level

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol.
        speech_turns : `Annotation`
            Speech turns. Should only contain `str` labels.

        Returns
        -------
        hypothesis : `pyannote.core.Annotation`
            Clustering result.
        """

        assert_string_labels(speech_turns, "speech_turns")

        embedding = self._embedding(current_file)

        labels = speech_turns.labels()
        X, clustered_labels, skipped_labels = [], [], []
        for l, label in enumerate(labels):

            timeline = speech_turns.label_timeline(label, copy=False)

            # be more and more permissive until we have
            # at least one embedding for current speech turn
            for mode in ["strict", "center", "loose"]:
                x = embedding.crop(timeline, mode=mode)
                if len(x) > 0:
                    break

            # skip labels so small we don't have any embedding for it
            if len(x) < 1:
                skipped_labels.append(label)
                continue

            clustered_labels.append(label)
            X.append(np.mean(x, axis=0))

        # apply clustering of label embeddings
        clusters = self.clustering(np.vstack(X))

        # map each clustered label to its cluster (between 1 and N_CLUSTERS)
        mapping = {label: k for label, k in zip(clustered_labels, clusters)}

        # map each skipped label to its own cluster
        # (between -1 and -N_SKIPPED_LABELS)
        for l, label in enumerate(skipped_labels):
            mapping[label] = -(l + 1)

        # do the actual mapping
        return speech_turns.rename_labels(mapping=mapping)
コード例 #2
0
    def _xxx_try_iter(self, subset):

        # load "who speaks when" reference
        data = self._load_data(subset)

        diarization = getattr(self, 'diarization', True)
        if diarization:
            AnnotationGroups = data['annotation'].groupby(by='uri')
        else:
            AnnotationGroups = data['annotation'].groupby(
                by=['uri', 'speaker'])

        # load trials
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        trials = data_dir / f'{subset}.trial.txt'
        names = ['model_id', 'uri', 'start', 'end', 'target', 'first', 'total']
        trials = read_table(trials, delim_whitespace=True, names=names)

        for trial in trials.itertuples():

            model_id = trial.model_id

            # FIE038_m1 ==> FIE038
            # FIE038_m42 ==> FIE038
            # Bernard_Pivot_m1 ==> Bernard_Pivot
            speaker = '_'.join(model_id.split('_')[:-1])

            # append Mix-Headset to uri
            raw_uri = trial.uri
            uri = f'{raw_uri}.Mix-Headset'

            # trial session
            try_with = Segment(start=trial.start, end=trial.end)

            if diarization:
                # 'annotation' & 'annotated' are needed when diarization is set
                # therefore, this needs a bit more work than when set to False.

                annotation = Annotation(uri=uri)
                turns = AnnotationGroups.get_group(raw_uri)
                for t, turn in enumerate(turns.itertuples()):
                    segment = Segment(start=turn.start,
                                      end=turn.start + turn.duration)
                    if not (segment & try_with):
                        continue
                    annotation[segment, t] = turn.speaker

                annotation = annotation.crop(try_with)
                reference = annotation.label_timeline(speaker)
                annotated = Timeline(uri=uri, segments=[try_with])

                # pack & yield trial
                current_trial = {
                    'database': 'Test',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                    'annotation': annotation,
                    'annotated': annotated,
                }

            else:
                # 'annotation' & 'annotated' are not needed when diarization is
                # set to False -- leading to a faster implementation...
                segments = []
                if trial.target == 'target':
                    turns = AnnotationGroups.get_group((raw_uri, speaker))
                    for t, turn in enumerate(turns.itertuples()):
                        segment = Segment(start=turn.start,
                                          end=turn.start + turn.duration)
                        segments.append(segment)
                reference = Timeline(uri=uri, segments=segments).crop(try_with)

                # pack & yield trial
                current_trial = {
                    'database': 'Test',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                }

            yield current_trial
コード例 #3
0
    def __call__(
        self, current_file: dict, speech_turns: Annotation, targets: Annotation
    ) -> Annotation:
        """Assign each speech turn to closest target (if close enough)

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol.
        speech_turns : `Annotation`
            Speech turns. Should only contain `int` labels.
        targets : `Annotation`
            Targets. Should only contain `str` labels.

        Returns
        -------
        assigned : `Annotation`
            Assigned speech turns.
        """

        assert_string_labels(targets, "targets")
        assert_int_labels(speech_turns, "speech_turns")

        embedding = self._embedding(current_file)

        # gather targets embedding
        labels = targets.labels()
        X_targets, targets_labels = [], []
        for l, label in enumerate(labels):

            timeline = targets.label_timeline(label, copy=False)

            # be more and more permissive until we have
            # at least one embedding for current speech turn
            for mode in ["center", "loose"]:
                x = embedding.crop(timeline, mode=mode)
                if len(x) > 0:
                    break

            # skip labels so small we don't have any embedding for it
            if len(x) < 1:
                continue

            targets_labels.append(label)
            X_targets.append(np.mean(x, axis=0))

        # gather speech turns embedding
        labels = speech_turns.labels()
        X, assigned_labels, skipped_labels = [], [], []
        for l, label in enumerate(labels):

            timeline = speech_turns.label_timeline(label, copy=False)

            # be more and more permissive until we have
            # at least one embedding for current speech turn
            for mode in ["center", "loose"]:
                x = embedding.crop(timeline, mode=mode)
                if len(x) > 0:
                    break

            # skip labels so small we don't have any embedding for it
            if len(x) < 1:
                skipped_labels.append(label)
                continue

            assigned_labels.append(label)
            X.append(np.mean(x, axis=0))

        # assign speech turns to closest class
        assignments = self.closest_assignment(np.vstack(X_targets), np.vstack(X))
        mapping = {
            label: targets_labels[k]
            for label, k in zip(assigned_labels, assignments)
            if not k < 0
        }
        return speech_turns.rename_labels(mapping=mapping)
コード例 #4
0
    def tst_try_iter(self):
        def get_turns(uri):
            ref_file_path = Path(__file__).parent / 'data' / 'speaker_diarization' / uri
            ref_file_path = Path(str(ref_file_path) + '.txt')
            gt_names = ['start', 'end', 'speaker', 'speakerID']
            return read_table(os.path.join(data_dir, ref_file_path), delim_whitespace=True, names=gt_names)

        diarization = getattr(self, 'diarization', True)

        # load trials
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        trials = data_dir / 'tst.trial.txt'
        names = ['model_id', 'uri', 'start', 'end', 'target', 'first', 'total']
        trials = read_table(trials, delim_whitespace=True, names=names)

        for trial in trials.itertuples():

            model_id = trial.model_id

            speaker = model_id

            uri = trial.uri

            # trial session
            try_with = Segment(start=trial.start, end=trial.end)

            if diarization:
                # 'annotation' & 'annotated' are needed when diarization is set
                # therefore, this needs a bit more work than when set to False.

                annotation = Annotation(uri=uri)
                turns = get_turns(uri)
                for t, turn in enumerate(turns.itertuples()):
                    segment = Segment(start=turn.start,
                                      end=turn.end)
                    if not (segment & try_with):
                        continue
                    annotation[segment, t] = turn.speakerID

                annotation = annotation.crop(try_with)
                reference = annotation.label_timeline(speaker)
                annotated = Timeline(uri=uri, segments=[try_with])

                # pack & yield trial
                current_trial = {
                    'database': 'Odessa',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                    'annotation': annotation,
                    'annotated': annotated,
                }

            else:
                # 'annotation' & 'annotated' are not needed when diarization is
                # set to False -- leading to a faster implementation...
                segments = []
                if trial.target == 'target':
                    turns = get_turns(uri).groupby(by='speakerID')
                    for t, turn in enumerate(turns.get_group(speaker).itertuples()):
                        segment = Segment(start=turn.start,
                                          end=turn.end)
                        segments.append(segment)
                reference = Timeline(uri=uri, segments=segments).crop(try_with)

                # pack & yield trial
                current_trial = {
                    'database': 'Odessa',
                    'uri': uri,
                    'try_with': try_with,
                    'model_id': model_id,
                    'reference': reference,
                }

            yield current_trial