Esempio n. 1
    def __init__(self, uri=None, modality=None):
        super(Annotation, self).__init__()

        self._uri = uri
        self.modality = modality

        # sorted dictionary
        # keys: annotated segments
        # values: {track: label} dictionary
        self._tracks = SortedDict(key_type=(float, float),

        # dictionary
        # key: label
        # value: timeline
        self._labels = {}
        self._labelNeedsUpdate = {}

        # timeline meant to store all annotated segments
        self._timeline = Timeline(uri=uri)
        self._timelineNeedsUpdate = True
Esempio n. 2
    def __init__(self, uri=None, modality=None):
        super(Annotation, self).__init__()

        self._uri = uri
        self.modality = modality

        # sorted dictionary
        # keys: annotated segments
        # values: {track: label} dictionary
        self._tracks = SortedDict(key_type=(float, float),

        # dictionary
        # key: label
        # value: timeline
        self._labels = {}
        self._labelNeedsUpdate = {}

        # timeline meant to store all annotated segments
        self._timeline = Timeline(uri=uri)
        self._timelineNeedsUpdate = True
Esempio n. 3
    def copy(self):

        # create new empty annotation
        copied = self.__class__(uri=self.uri, modality=self.modality)

        # deep copy internal track dictionary
        _tracks = [(key, dict(value)) for (key, value) in self._tracks.items()]
        copied._tracks = SortedDict(items=_tracks,
                                    key_type=(float, float),

        # deep copy internal label timelines
        _labels = {key: timeline.copy()
                   for (key, timeline) in self._labels.iteritems()}
        copied._labels = _labels

        # deep copy need-update indicator
        copied._labelNeedsUpdate = dict(self._labelNeedsUpdate)

        copied._timelineNeedsUpdate = self._timelineNeedsUpdate

        return copied
Esempio n. 4
class Annotation(object):

    uri : string, optional
        uniform resource identifier of annotated document
    modality : string, optional
        name of annotated modality


    def from_df(cls, df, uri=None, modality=None, aggfunc=np.mean):

        df : DataFrame
            Must contain the following columns: 'segment', 'track' and 'label'
        uri : str, optional
            Resource identifier
        modality : str, optional
        aggfunc : func
            Value aggregation function in case of duplicate (segment, track,
            label) tuples


        annotation = cls(uri=uri, modality=modality)
        for _, (segment, track, label) in df[[SEGMENT, TRACK, LABEL]].iterrows():
            annotation[segment, track] = label
        return annotation

    def __init__(self, uri=None, modality=None):
        super(Annotation, self).__init__()

        self._uri = uri
        self.modality = modality

        # sorted dictionary
        # keys: annotated segments
        # values: {track: label} dictionary
        self._tracks = SortedDict(key_type=(float, float),

        # dictionary
        # key: label
        # value: timeline
        self._labels = {}
        self._labelNeedsUpdate = {}

        # timeline meant to store all annotated segments
        self._timeline = Timeline(uri=uri)
        self._timelineNeedsUpdate = True

    def _get_uri(self):
        return self._uri

    def _set_uri(self, uri):
        # update uri for all internal timelines
        for _, timeline in self._labels.iteritems():
            timeline.uri = uri
        self._uri = uri

    uri = property(_get_uri, fset=_set_uri, doc="Resource identifier")

    def _updateLabels(self):

        # (re-)initialize changed label timeline
        for l, needsUpdate in self._labelNeedsUpdate.iteritems():
            if needsUpdate:
                self._labels[l] = Timeline(uri=self.uri)

        # fill changed label timeline
        for segment, track, l in self.itertracks(label=True):
            if self._labelNeedsUpdate[l]:

        self._labelNeedsUpdate = {l: False for l in self._labels}

        # remove "ghost" labels (i.e. label with empty timeline)
        labels = self._labels.keys()
        for l in labels:
            if not self._labels[l]:

    def __len__(self):
        """Number of segments"""
        return self._tracks.length()

    def __nonzero__(self):
        return self._tracks.length() > 0

    def itersegments(self):
        """Segment iterator"""
        return iter(self._tracks)

    def itertracks(self, label=False):
        for segment, tracks in self._tracks.items():
            for track, lbl in tracks.iteritems():
                if label:
                    yield segment, track, lbl
                    yield segment, track

    def _updateTimeline(self):
        self._timeline = Timeline(segments=self._tracks, uri=self.uri)
        self._timelineNeedsUpdate = False

    def get_timeline(self):
        """Get timeline made of annotated segments"""
        if self._timelineNeedsUpdate:
        return self._timeline

    def __eq__(self, other):
        return self._tracks == other._tracks

    def __ne__(self, other):
        return self._tracks != other._tracks

    def __contains__(self, included):

        Use expression 'segment in annotation' or 'timeline in annotation'

        included : `Segment` or `Timeline`

        contains : bool
            True if every segment in `included` exists in annotation
            False otherwise

        return included in self.get_timeline()

    def crop(self, other, mode='intersection'):
        """Crop annotation

        other : `Segment` or `Timeline`

        mode : {'strict', 'loose', 'intersection'}
            In 'strict' mode, only segments fully included in focus coverage
            are kept. In 'loose' mode, any intersecting segment is kept
            unchanged. In 'intersection' mode, only intersecting segments are
            kept and replaced by their actual intersection with the focus.

        cropped : Annotation

        In 'intersection' mode, the best is done to keep the track names
        unchanged. However, in some cases where two original segments are
        cropped into the same resulting segments, conflicting track names are
        modified to make sure no track is lost.

        if isinstance(other, Segment):
            other = Timeline(segments=[other], uri=self.uri)
            cropped = self.crop(other, mode=mode)

        elif isinstance(other, Timeline):

            cropped = self.__class__(uri=self.uri, modality=self.modality)

            if mode == 'loose':
                # TODO
                # update co_iter to yield (segment, tracks), (segment, tracks)
                # instead of segment, segment
                # This would avoid calling ._tracks.get(segment)
                for segment, _ in self.get_timeline().co_iter(other):
                    for track, label in self._tracks[segment].iteritems():
                        cropped[segment, track] = label

            elif mode == 'strict':
                # TODO
                # see above
                for segment, other_segment in self.get_timeline().co_iter(other):
                    if segment in other_segment:
                        for track, label in self._tracks[segment].iteritems():
                            cropped[segment, track] = label

            elif mode == 'intersection':
                # see above
                for segment, other_segment in self.get_timeline().co_iter(other):
                    intersection = segment & other_segment
                    for track, label in self._tracks[segment].iteritems():
                        track = cropped.new_track(intersection,
                        cropped[intersection, track] = label

                raise NotImplementedError("unsupported mode: '%s'" % mode)

        return cropped

    def get_tracks(self, segment):
        """Set of tracks for query segment

        segment : `Segment`
            Query segment

        tracks : set
            Set of tracks for query segment
        return set(self._tracks.get(segment, {}))

    def has_track(self, segment, track):
        """Check whether a given track exists

        segment : `Segment`
            Query segment
        track :
            Query track

        exists : bool
            True if track exists for segment
        return track in self._tracks.get(segment, {})

    def get_track_by_name(self, track):
        """Get all tracks with given name

        track : any valid track name
            Requested name track

        tracks : list
            List of (segment, track) tuples
        raise NotImplementedError('')

    def copy(self):

        # create new empty annotation
        copied = self.__class__(uri=self.uri, modality=self.modality)

        # deep copy internal track dictionary
        _tracks = [(key, dict(value)) for (key, value) in self._tracks.items()]
        copied._tracks = SortedDict(items=_tracks,
                                    key_type=(float, float),

        # deep copy internal label timelines
        _labels = {key: timeline.copy()
                   for (key, timeline) in self._labels.iteritems()}
        copied._labels = _labels

        # deep copy need-update indicator
        copied._labelNeedsUpdate = dict(self._labelNeedsUpdate)

        copied._timelineNeedsUpdate = self._timelineNeedsUpdate

        return copied

    def retrack(self):
        retracked = self.__class__(uri=self.uri, modality=self.modality)
        for n, (s, _, label) in enumerate(self.itertracks(label=True)):
            retracked[s, n] = label
        return retracked

    def new_track(self, segment, candidate=None, prefix=None):
        """Track name generator

        segment : Segment
        prefix : str, optional
        candidate : any valid track name

        track : str
            New track name

        # obtain list of existing tracks for segment
        existing_tracks = set(self._tracks.get(segment, {}))

        # if candidate is provided, check whether it already exists
        # in case it does not, use it
        if (candidate is not None) and (candidate not in existing_tracks):
            return candidate

        # no candidate was provided or the provided candidate already exists
        # we need to create a brand new one

        # by default (if prefix is not provided)
        # use modality as prefix (eg. speaker1, speaker2, ...)
        if prefix is None:
            prefix = '' if self.modality is None else str(self.modality)

        # find first non-existing track name for segment
        # eg. if speaker1 exists, try speaker2, then speaker3, ...
        count = 1
        while ('%s%d' % (prefix, count)) in existing_tracks:
            count += 1

        # return first non-existing track name
        return '%s%d' % (prefix, count)

    def __str__(self):
        """Human-friendly representation"""
        # TODO: use pandas.DataFrame
        return "\n".join(["%s %s %s" % (s, t, l)
                          for s, t, l in self.itertracks(label=True)])

    def __delitem__(self, key):

        # del annotation[segment]
        if isinstance(key, Segment):

            # Pop segment out of dictionary
            # and get corresponding tracks
            # Raises KeyError if segment does not exist
            tracks = self._tracks.pop(key)

            # mark timeline as modified
            self._timelineNeedsUpdate = True

            # mark every label in tracks as modified
            for track, label in tracks.iteritems():
                self._labelNeedsUpdate[label] = True

        # del annotation[segment, track]
        elif isinstance(key, tuple) and len(key) == 2:

            # get segment tracks as dictionary
            # if segment does not exist, get empty dictionary
            # Raises KeyError if segment does not exist
            tracks = self._tracks[key[0]]

            # pop track out of tracks dictionary
            # and get corresponding label
            # Raises KeyError if track does not exist
            label = tracks.pop(key[1])

            # mark label as modified
            self._labelNeedsUpdate[label] = True

            # if tracks dictionary is now empty,
            # remove segment as well
            if not tracks:
                self._timelineNeedsUpdate = True

            raise KeyError('')

    # label = annotation[segment, track]
    def __getitem__(self, key):

        if isinstance(key, Segment):
            key = (key, '_')

        return self._tracks[key[0]][key[1]]

    # annotation[segment, track] = label
    def __setitem__(self, key, label):

        if isinstance(key, Segment):
            key = (key, '_')

        if key[0] not in self._tracks:
            self._tracks[key[0]] = {}
            self._timelineNeedsUpdate = True

        self._tracks[key[0]][key[1]] = label
        self._labelNeedsUpdate[label] = True

    def empty(self):
        return self.__class__(uri=self.uri, modality=self.modality)

    def labels(self, unknown=True):
        """List of labels

        unknown : bool, optional
            When False, do not return Unknown instances
            When True, return any label (even Unknown instances)

        labels : list
            Sorted list of labels

            Labels are sorted based on their string representation.

        if any([lnu for lnu in self._labelNeedsUpdate.values()]):

        labels = sorted(self._labels, key=str)

        if not unknown:
            labels = [l for l in labels if not isinstance(l, Unknown)]

        return labels

    def get_labels(self, segment, unknown=True, unique=True):
        """Local set of labels

        segment : Segment
            Segments to get label from.
        unknown : bool, optional
            When False, do not return Unknown instances
            When True, return any label (even Unknown instances)
        unique : bool, optional
            When False, return the list of (possibly repeated) labels.
            When True (default), return the set of labels
        labels : set
            Set of labels for `segment` if it exists, empty set otherwise.


            >>> annotation = Annotation()
            >>> segment = Segment(0, 2)
            >>> annotation[segment, 'speaker1'] = 'Bernard'
            >>> annotation[segment, 'speaker2'] = 'John'
            >>> print sorted(annotation.get_labels(segment))
            set(['Bernard', 'John'])
            >>> print annotation.get_labels(Segment(1, 2))


        labels = self._tracks.get(segment, {}).values()

        if not unknown:
            labels = [l for l in labels if not isinstance(l, Unknown)]

        if unique:
            labels = set(labels)

        return labels

    def subset(self, labels, invert=False):
        """Annotation subset

        Extract annotation subset based on labels

        labels : set
            Set of labels
        invert : bool, optional
            If invert is True, extract all but requested `labels`

        subset : `Annotation`
            Annotation subset.

        if not isinstance(labels, set):
            raise TypeError('labels must be provided as a set of labels.')

        if invert:
            labels = set(self.labels()) - labels
            labels = labels & set(self.labels())

        sub = self.__class__(uri=self.uri, modality=self.modality)
        for segment, track, label in self.itertracks(label=True):
            if label in labels:
                sub[segment, track] = label

        return sub

    def label_timeline(self, label):
        """Get timeline for a given label

        label :

        timeline : :class:`Timeline`
            Timeline made of all segments annotated with `label`

        if label not in self.labels():
            return Timeline(uri=self.uri)

        if self._labelNeedsUpdate[label]:

            for l, hasChanged in self._labelNeedsUpdate.iteritems():
                if hasChanged:
                    self._labels[l] = Timeline(uri=self.uri)

            for segment, track, l in self.itertracks(label=True):
                if self._labelNeedsUpdate[l]:

            self._labelNeedsUpdate = {l: False for l in self._labels}

        return self._labels[label]

    def label_coverage(self, label):

        label :


        if label not in self.labels():
            return Timeline(uri=self.uri)

        return self.label_timeline(label).coverage()

    def label_duration(self, label):

        if label not in self.labels():
            return 0.

        return self.label_timeline(label).duration()

    def chart(self, percent=False):
        Label chart based on their duration

        percent : bool, optional
            Return total duration percentage (rather than raw duration)

        chart : (label, duration) iterable
            Sorted from longest to shortest.


        chart = sorted([(label, self.label_duration(label))
                        for label in self.labels()],
                       key=lambda x: x[1], reverse=True)

        if percent:
            total = np.sum([duration for _, duration in chart])
            chart = [(label, duration/total) for (label, duration) in chart]

        return chart

    def argmax(self, segment=None, known_first=False):
        """Get most frequent label

        segment : Segment, optional
            Section of annotation where to look for the most frequent label.
            Defaults to whole annotation extent.
        known_first: bool, optional
            If True, artificially reduces the duration of intersection of
            `Unknown` labels so that 'known' labels are returned first.

        label : any existing label or None
            Label with longest intersection


            >>> annotation = Annotation(modality='speaker')
            >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice'
            >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob'
            >>> print "%s is such a talker!" % annotation.argmax()
            Bob is such a talker!
            >>> segment = Segment(22, 23)
            >>> if not annotation.argmax(segment):
            ...    print "No label intersecting %s" % segment
            No label intersection [22 --> 23]


        # if annotation is empty, obviously there is no most frequent label
        if not self:
            return None

        # if segment is not provided, just look for the overall most frequent
        # label (ie. set segment to the extent of the annotation)
        if segment is None:
            segment = self.get_timeline().extent()

        # compute intersection duration for each label
        durations = {lbl: self.label_timeline(lbl).crop(segment, mode='intersection').duration()
                     for lbl in self.labels()}

        # artifically reduce intersection duration of Unknown labels
        # so that 'known' labels are returned first
        if known_first:
            maxduration = max(durations.values())
            for lbl in durations.keys():
                if isinstance(lbl, Unknown):
                    durations[lbl] = durations[lbl] - maxduration

        # find the most frequent label
        label = max(durations.iteritems(), key=operator.itemgetter(1))[0]

        # in case all durations were zero, there is no most frequent label
        return label if durations[label] > 0 else None

    def __rshift__(self, timeline):
        """Tag a timeline

        Use expression 'tagged = annotation >> timeline'

        Shortcut for :
            >>> tagger = DirectTagger()
            >>> tagged = tagger(annotation, timeline)

        timeline : :class:`pyannote.base.timeline.Timeline`

        tagged : :class:`pyannote.base.annotation.Annotation`
            Tagged timeline - one track per intersecting label.

        from pyannote.algorithm.tagging import DirectTagger
        if not isinstance(timeline, Timeline):
            raise TypeError('direct tagging (>>) only works with timelines.')
        return DirectTagger()(self, timeline)

    def translate(self, translation):
        """Translate labels

        translation: dict
            Label translation.
            Labels with no associated translation are kept unchanged.

        translated : :class:`Annotation`
            New annotation with translated labels.

        assert isinstance(translation, dict)

        # create an empty copy
        translated = self.empty()

        for segment, track, label in self.itertracks(label=True):
            # only transform labels that have an actual translation
            # in the provided dictionary, keep the others as they are.
            translated[segment, track] = translation.get(label, label)

        return translated

    def __mod__(self, translation):
        return self.translate(translation)

    def anonymize_labels(self):
        """Anonmyize labels

        Create a new annotation where labels are anonymized, ie. each label
        is replaced by a unique `Unknown` instance.

        anonymized : :class:`Annotation`
            New annotation with anonymized labels.

        translation = {label: Unknown() for label in self.labels()}
        return self % translation

    def anonymize_tracks(self):
        Anonymize tracks

        Create a new annotation where each track is anonymized, i.e. the label
        of each track is set to a unique `Unknown` instance

        anonymized : `Annotation`
            Anonymized annotation

        anonymized = self.empty()
        for s, t, _ in self.itertracks(label=True):
            anonymized[s, t] = Unknown()
        return anonymized

    def smooth(self):
        """Smooth annotation

        Create new annotation where contiguous tracks with same label are
        merged into one longer track.

        annotation : Annotation
            New annotation where contiguous tracks with same label are merged
            into one long track.

            Track names are lost in the process.


        smoothed = self.empty()

        n = 0
        for label in self.labels():
            coverage = self.label_coverage(label)
            for segment in coverage:
                smoothed[segment, n] = label
                n = n+1

        return smoothed

    def co_iter(self, other):
        other : Annotation

        (segment, track), (other_segment, other_track)

        for s, S in self.get_timeline().co_iter(other.get_timeline()):
            tracks = self.get_tracks(s)
            other_tracks = other.get_tracks(S)
            for t, T in itertools.product(tracks, other_tracks):
                yield (s, t), (S, T)

    def for_json(self):
        data = {
                [s.for_json(), t, l] for s, t, l in self.itertracks(label=True)
        if self.uri:
            data[PYANNOTE_URI] = self.uri
        if self.modality:
            data[PYANNOTE_MODALITY] = self.modality
        return data

    def from_json(cls, data):
        uri = data.get(PYANNOTE_URI, None)
        modality = data.get(PYANNOTE_MODALITY, None)
        annotation = cls(uri=uri, modality=modality)
        for s, track, label in data[PYANNOTE_JSON_ANNOTATION]:
            segment = Segment.from_json(s)
            annotation[segment, track] = label
        return annotation

    def _repr_png_(self):
        from pyannote.core.notebook import repr_annotation
        return repr_annotation(self)
Esempio n. 5
class Annotation(object):

    uri : string, optional
        uniform resource identifier of annotated document
    modality : string, optional
        name of annotated modality

    def from_df(cls, df, uri=None, modality=None):

        df : DataFrame
            Must contain the following columns: 'segment', 'track' and 'label'
        uri : str, optional
            Resource identifier
        modality : str, optional




        annotation = cls(uri=uri, modality=modality)

        annotation._tracks = SortedDict(key_type=(float, float), updator=TimelineUpdator)

        for row in df.itertuples():
            if row[1] in annotation._tracks:
                annotation._tracks[row[1]][row[2]] = row[3]
                annotation._tracks[row[1]] = {row[2]: row[3]}

        annotation._labels = {label: None for label in df["label"].unique()}
        annotation._labelNeedsUpdate = {label: True for label in annotation._labels}

        annotation._timeline = None
        annotation._timelineNeedsUpdate = True

        return annotation

    def __init__(self, uri=None, modality=None):

        super(Annotation, self).__init__()

        self._uri = uri
        self.modality = modality

        # sorted dictionary
        # keys: annotated segments
        # values: {track: label} dictionary
        self._tracks = SortedDict(key_type=(float, float), updator=TimelineUpdator)

        # dictionary
        # key: label
        # value: timeline
        self._labels = {}
        self._labelNeedsUpdate = {}

        # timeline meant to store all annotated segments
        self._timeline = None
        self._timelineNeedsUpdate = True

    def _get_uri(self):
        return self._uri

    def _set_uri(self, uri):
        # update uri for all internal timelines
        for _, timeline in six.iteritems(self._labels):
            timeline.uri = uri
        self._uri = uri

    uri = property(_get_uri, fset=_set_uri, doc="Resource identifier")

    def _updateLabels(self):

        # list of labels that needs to be updated
        update = set(label for label, update in self._labelNeedsUpdate.items() if update)

        # accumulate segments for updated labels
        _segments = {label: [] for label in update}
        for segment, track, label in self.itertracks(label=True):
            if label in update:

        # create timeline with accumulated segments for updated labels
        for label in update:
            if _segments[label]:
                self._labels[label] = Timeline(segments=_segments[label], uri=self.uri)
                self._labelNeedsUpdate[label] = False

    def __len__(self):
        """Number of segments"""
        return self._tracks.length()

    def __bool__(self):
        return self._tracks.length() > 0

    def __nonzero__(self):
        return self.__bool__()

    def itersegments(self):
        """Segment iterator"""
        return iter(self._tracks)

    def itertracks(self, label=False):
        for segment, tracks in self._tracks.items():
            for track, lbl in sorted(six.iteritems(tracks), key=lambda tl: (str(tl[0]), str(tl[1]))):
                if label:
                    yield segment, track, lbl
                    yield segment, track

    def _updateTimeline(self):
        self._timeline = Timeline(segments=self._tracks, uri=self.uri)
        self._timelineNeedsUpdate = False

    def get_timeline(self, copy=True):
        """Get timeline made of annotated segments"""
        if self._timelineNeedsUpdate:
        if copy:
            return self._timeline.copy()
        return self._timeline

    def __eq__(self, other):
        pairOfTracks = six.moves.zip_longest(self.itertracks(label=True), other.itertracks(label=True))
        return all(t1 == t2 for t1, t2 in pairOfTracks)

    def __ne__(self, other):
        pairOfTracks = six.moves.zip_longest(self.itertracks(label=True), other.itertracks(label=True))

        return any(t1 != t2 for t1, t2 in pairOfTracks)

    def __contains__(self, included):

        Use expression 'segment in annotation' or 'timeline in annotation'

        included : `Segment` or `Timeline`

        contains : bool
            True if every segment in `included` exists in annotation
            False otherwise

        return included in self.get_timeline(copy=False)

    def crop(self, other, mode="intersection"):
        """Crop annotation

        other : `Segment` or `Timeline`

        mode : {'strict', 'loose', 'intersection'}
            In 'strict' mode, only segments fully included in focus coverage
            are kept. In 'loose' mode, any intersecting segment is kept
            unchanged. In 'intersection' mode, only intersecting segments are
            kept and replaced by their actual intersection with the focus.

        cropped : Annotation

        In 'intersection' mode, the best is done to keep the track names
        unchanged. However, in some cases where two original segments are
        cropped into the same resulting segments, conflicting track names are
        modified to make sure no track is lost.

        if isinstance(other, Segment):
            other = Timeline(segments=[other], uri=self.uri)
            cropped = self.crop(other, mode=mode)

        elif isinstance(other, Timeline):

            cropped = self.__class__(uri=self.uri, modality=self.modality)

            if mode == "loose":
                # TODO
                # update co_iter to yield (segment, tracks), (segment, tracks)
                # instead of segment, segment
                # This would avoid calling ._tracks.get(segment)
                for segment, _ in self.get_timeline(copy=False).co_iter(other):
                    for track, label in six.iteritems(self._tracks[segment]):
                        cropped[segment, track] = label

            elif mode == "strict":
                # TODO
                # see above
                for segment, other_segment in self.get_timeline(copy=False).co_iter(other):

                    if segment in other_segment:
                        for track, label in six.iteritems(self._tracks[segment]):
                            cropped[segment, track] = label

            elif mode == "intersection":
                # see above
                for segment, other_segment in self.get_timeline(copy=False).co_iter(other):

                    intersection = segment & other_segment
                    for track, label in six.iteritems(self._tracks[segment]):
                        track = cropped.new_track(intersection, candidate=track)
                        cropped[intersection, track] = label

                raise NotImplementedError("unsupported mode: '%s'" % mode)

        return cropped

    def get_tracks(self, segment):
        """Set of tracks for query segment

        segment : `Segment`
            Query segment

        tracks : set
            Set of tracks for query segment
        return set(self._tracks.get(segment, {}))

    def has_track(self, segment, track):
        """Check whether a given track exists

        segment : `Segment`
            Query segment
        track :
            Query track

        exists : bool
            True if track exists for segment
        return track in self._tracks.get(segment, {})

    def get_track_by_name(self, track):
        """Get all tracks with given name

        track : any valid track name
            Requested name track

        tracks : list
            List of (segment, track) tuples
        raise NotImplementedError("")

    def copy(self):

        # create new empty annotation
        copied = self.__class__(uri=self.uri, modality=self.modality)

        # deep copy internal track dictionary
        _tracks = [(key, dict(value)) for (key, value) in self._tracks.items()]
        copied._tracks = SortedDict(items=_tracks, key_type=(float, float), updator=TimelineUpdator)

        # deep copy internal label timelines
        _labels = {key: timeline.copy() for (key, timeline) in six.iteritems(self._labels)}
        copied._labels = _labels

        # deep copy need-update indicator
        copied._labelNeedsUpdate = dict(self._labelNeedsUpdate)

        copied._timelineNeedsUpdate = True

        return copied

    def retrack(self):
        retracked = self.__class__(uri=self.uri, modality=self.modality)
        for n, (s, _, label) in enumerate(self.itertracks(label=True)):
            retracked[s, n] = label
        return retracked

    def new_track(self, segment, candidate=None, prefix=None):
        """Track name generator

        segment : Segment
        prefix : str, optional
        candidate : any valid track name

        track : str
            New track name

        # obtain list of existing tracks for segment
        existing_tracks = set(self._tracks.get(segment, {}))

        # if candidate is provided, check whether it already exists
        # in case it does not, use it
        if (candidate is not None) and (candidate not in existing_tracks):
            return candidate

        # no candidate was provided or the provided candidate already exists
        # we need to create a brand new one

        # by default (if prefix is not provided), use ''
        if prefix is None:
            prefix = ""

        # find first non-existing track name for segment
        # eg. if '0' exists, try '1', then '2', ...
        count = 0
        while ("%s%d" % (prefix, count)) in existing_tracks:
            count += 1

        # return first non-existing track name
        return "%s%d" % (prefix, count)

    def __str__(self):
        """Human-friendly representation"""
        # TODO: use pandas.DataFrame
        return "\n".join(["%s %s %s" % (s, t, l) for s, t, l in self.itertracks(label=True)])

    def __delitem__(self, key):

        # del annotation[segment]
        if isinstance(key, Segment):

            # Pop segment out of dictionary
            # and get corresponding tracks
            # Raises KeyError if segment does not exist
            tracks = self._tracks.pop(key)

            # mark timeline as modified
            self._timelineNeedsUpdate = True

            # mark every label in tracks as modified
            for track, label in six.iteritems(tracks):
                self._labelNeedsUpdate[label] = True

        # del annotation[segment, track]
        elif isinstance(key, tuple) and len(key) == 2:

            # get segment tracks as dictionary
            # if segment does not exist, get empty dictionary
            # Raises KeyError if segment does not exist
            tracks = self._tracks[key[0]]

            # pop track out of tracks dictionary
            # and get corresponding label
            # Raises KeyError if track does not exist
            label = tracks.pop(key[1])

            # mark label as modified
            self._labelNeedsUpdate[label] = True

            # if tracks dictionary is now empty,
            # remove segment as well
            if not tracks:
                self._timelineNeedsUpdate = True

            raise KeyError("")

    # label = annotation[segment, track]
    def __getitem__(self, key):

        if isinstance(key, Segment):
            key = (key, "_")

        return self._tracks[key[0]][key[1]]

    # annotation[segment, track] = label
    def __setitem__(self, key, label):

        if isinstance(key, Segment):
            key = (key, "_")

        segment, track = key

        # do not add empty track
        if not segment:

        # in case we create a new segment
        # mark timeline as modified
        if segment not in self._tracks:
            self._tracks[segment] = {}
            self._timelineNeedsUpdate = True

        # in case we modify an existing track
        # mark old label as modified
        if track in self._tracks[segment]:
            old_label = self._tracks[segment][track]
            self._labelNeedsUpdate[old_label] = True

        # mark new label as modified
        self._tracks[segment][track] = label
        self._labelNeedsUpdate[label] = True

    def empty(self):
        return self.__class__(uri=self.uri, modality=self.modality)

    def labels(self):
        """List of labels

        labels : list
            Sorted list of labels
        if any([lnu for lnu in self._labelNeedsUpdate.values()]):
        return sorted(self._labels, key=str)

    def get_labels(self, segment, unique=True):
        """Local set of labels

        segment : Segment
            Segments to get label from.
        unique : bool, optional
            When False, return the list of (possibly repeated) labels.
            When True (default), return the set of labels
        labels : set
            Set of labels for `segment` if it exists, empty set otherwise.

        >>> annotation = Annotation()
        >>> segment = Segment(0, 2)
        >>> annotation[segment, 'speaker1'] = 'Bernard'
        >>> annotation[segment, 'speaker2'] = 'John'
        >>> print sorted(annotation.get_labels(segment))
        set(['Bernard', 'John'])
        >>> print annotation.get_labels(Segment(1, 2))


        labels = self._tracks.get(segment, {}).values()

        if unique:
            return set(labels)

        return labels

    def subset(self, labels, invert=False):
        """Annotation subset

        Extract annotation subset based on labels

        labels : iterable
            Label iterable.
        invert : bool, optional
            If invert is True, extract all but requested `labels`

        subset : `Annotation`
            Annotation subset.

        labels = set(labels)

        if invert:
            labels = set(self.labels()) - labels
            labels = labels & set(self.labels())

        sub = self.__class__(uri=self.uri, modality=self.modality)
        for segment, track, label in self.itertracks(label=True):
            if label in labels:
                sub[segment, track] = label

        return sub

    def update(self, annotation, copy=False):
        """Update existing annotations or create new ones

        annotation : Annotation
            Updated (or new) annotations
        copy : bool, optional
            Create a copy before updating. Defaults to False.

        updated : Annotation
            Updated annotations.

        result = self.copy() if copy else self
        for segment, track, label in annotation.itertracks(label=True):
            result[segment, track] = label
        return result

    def label_timeline(self, label, copy=True):
        """Get timeline for a given label

        label :
        copy : bool, optional
            Defaults to True.

        timeline : :class:`Timeline`
            Timeline made of all segments annotated with `label`

        if label not in self.labels():
            return Timeline(uri=self.uri)

        if self._labelNeedsUpdate[label]:

        if copy:
            return self._labels[label].copy()

        return self._labels[label]

    def label_coverage(self, label):
        """Return label coverage (or support)

        label : any valid label

        coverage : Timeline

        return self.label_timeline(label, copy=False).coverage()

    def label_duration(self, label):
        return self.label_timeline(label, copy=False).duration()

    def chart(self, percent=False):
        Label chart based on their duration

        percent : bool, optional
            Return total duration percentage (rather than raw duration)

        chart : (label, duration) iterable
            Sorted from longest to shortest.


        chart = sorted(
            [(label, self.label_duration(label)) for label in self.labels()], key=lambda x: x[1], reverse=True

        if percent:
            total = np.sum([duration for _, duration in chart])
            chart = [(label, duration / total) for (label, duration) in chart]

        return chart

    def argmax(self, segment=None):
        """Get most frequent label

        segment : Segment, optional
            Section of annotation where to look for the most frequent label.
            Defaults to whole annotation extent.

        label : any existing label or None
            Label with longest intersection


            >>> annotation = Annotation(modality='speaker')
            >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice'
            >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob'
            >>> print "%s is such a talker!" % annotation.argmax()
            Bob is such a talker!
            >>> segment = Segment(22, 23)
            >>> if not annotation.argmax(segment):
            ...    print "No label intersecting %s" % segment
            No label intersection [22 --> 23]


        # if annotation is empty, obviously there is no most frequent label
        if not self:
            return None

        # if segment is not provided, just look for the overall most frequent
        # label (ie. set segment to the extent of the annotation)
        if segment is None:
            segment = self.get_timeline(copy=False).extent()

        # compute intersection duration for each label
        durations = {
            lbl: self.label_timeline(lbl, copy=False).crop(segment, mode="intersection").duration()
            for lbl in self.labels()

        # find the most frequent label
        label = max(six.iteritems(durations), key=operator.itemgetter(1))[0]

        # in case all durations were zero, there is no most frequent label
        return label if durations[label] > 0 else None

    def translate(self, translation):
        """Translate labels

        translation: dict
            Label translation.
            Labels with no associated translation are kept unchanged.

        translated : :class:`Annotation`
            New annotation with translated labels.

        assert isinstance(translation, dict)

        # create an empty copy
        translated = self.empty()

        for segment, track, label in self.itertracks(label=True):
            # only transform labels that have an actual translation
            # in the provided dictionary, keep the others as they are.
            translated[segment, track] = translation.get(label, label)

        return translated

    def __mod__(self, translation):
        return self.translate(translation)

    def anonymize_labels(self, generator="string"):
        """Anonmyize labels

        Create a new annotation where labels are anonymized.

        generator : {'string', 'int', iterator}, optional

        anonymized : :class:`Annotation`
            New annotation with anonymized labels.


        if generator == "string":
            generator = string_generator()
        elif generator == "int":
            generator = int_generator()

        mapping = {label: next(generator) for label in self.labels()}
        return self.translate(mapping)

    def anonymize_tracks(self, generator="string"):
        Anonymize tracks

        Create a new annotation where each track has a unique label.

        generator : {'string', 'int', iterator}, optional
            Default to 'string'.

        anonymized : `Annotation`
            New annotation with anonymized tracks.


        if generator == "string":
            generator = string_generator()
        elif generator == "int":
            generator = int_generator()

        anonymized = self.empty()
        for s, t, _ in self.itertracks(label=True):
            anonymized[s, t] = next(generator)

        return anonymized

    def smooth(self, collar=0.0):
        """Smooth annotation

        Create new annotation where contiguous tracks with same label are
        merged into one longer track.

        collar : float
            If collar is positive, also merge tracks separated by less than
            collar duration.

        annotation : Annotation
            New annotation where contiguous tracks with same label are merged
            into one long track.

            Track names are lost in the process.

        # initialize an empty annotation
        # with same uri and modality as original
        smoothed = self.empty()
        for label in self.labels():

            # get timeline for current label
            timeline = self.label_timeline(label, copy=True)

            # fill the gaps shorter than collar
            if collar > 0.0:
                gaps = timeline.gaps()
                for gap in gaps:
                    if gap.duration < collar:

            # reconstruct annotation with merged tracks
            for segment in timeline.coverage():
                track = smoothed.new_track(segment)
                smoothed[segment, track] = label

        # return
        return smoothed

    def co_iter(self, other):
        other : Annotation

        (segment, track), (other_segment, other_track)
        timeline = self.get_timeline(copy=False)
        other_timeline = other.get_timeline(copy=False)
        for s, S in timeline.co_iter(other_timeline):
            tracks = sorted(self.get_tracks(s), key=str)
            other_tracks = sorted(other.get_tracks(S), key=str)
            for t, T in itertools.product(tracks, other_tracks):
                yield (s, t), (S, T)

    def __mul__(self, other):
        """Compute cooccurrence matrix"""

        i = self.labels()
        j = other.labels()

        matrix = DataArray(np.zeros((len(i), len(j))), coords=[("i", i), ("j", j)])

        for (segment, track), (other_segment, other_track) in self.co_iter(other):
            label = self[segment, track]
            other_label = other[other_segment, other_track]
            duration = (segment & other_segment).duration
            matrix.loc[label, other_label] += duration

        return matrix

    def for_json(self):

        data = {PYANNOTE_JSON: self.__class__.__name__}
        content = [
            {PYANNOTE_SEGMENT: s.for_json(), PYANNOTE_TRACK: t, PYANNOTE_LABEL: l}
            for s, t, l in self.itertracks(label=True)
        data[PYANNOTE_JSON_CONTENT] = content

        if self.uri:
            data[PYANNOTE_URI] = self.uri

        if self.modality:
            data[PYANNOTE_MODALITY] = self.modality

        return data

    def from_json(cls, data):

        uri = data.get(PYANNOTE_URI, None)
        modality = data.get(PYANNOTE_MODALITY, None)
        annotation = cls(uri=uri, modality=modality)
        for one in data[PYANNOTE_JSON_CONTENT]:
            segment = Segment.from_json(one[PYANNOTE_SEGMENT])
            track = one[PYANNOTE_TRACK]
            label = one[PYANNOTE_LABEL]
            annotation[segment, track] = label

        return annotation

    def _repr_png_(self):
        from .notebook import repr_annotation

        return repr_annotation(self)
Esempio n. 6
class Annotation(object):

    uri : string, optional
        uniform resource identifier of annotated document
    modality : string, optional
        name of annotated modality


    def from_df(cls, df, uri=None, modality=None):

        df : DataFrame
            Must contain the following columns: 'segment', 'track' and 'label'
        uri : str, optional
            Resource identifier
        modality : str, optional


        annotation = cls(uri=uri, modality=modality)
        for _, (segment, track, label) in df[
            annotation[segment, track] = label

        return annotation

    def __init__(self, uri=None, modality=None):
        super(Annotation, self).__init__()

        self._uri = uri
        self.modality = modality

        # sorted dictionary
        # keys: annotated segments
        # values: {track: label} dictionary
        self._tracks = SortedDict(key_type=(float, float),

        # dictionary
        # key: label
        # value: timeline
        self._labels = {}
        self._labelNeedsUpdate = {}

        # timeline meant to store all annotated segments
        self._timeline = Timeline(uri=uri)
        self._timelineNeedsUpdate = True

    def _get_uri(self):
        return self._uri

    def _set_uri(self, uri):
        # update uri for all internal timelines
        for _, timeline in self._labels.iteritems():
            timeline.uri = uri
        self._uri = uri

    uri = property(_get_uri, fset=_set_uri, doc="Resource identifier")

    def _updateLabels(self):

        # (re-)initialize changed label timeline
        for l, needsUpdate in self._labelNeedsUpdate.iteritems():
            if needsUpdate:
                self._labels[l] = Timeline(uri=self.uri)

        # fill changed label timeline
        for segment, track, l in self.itertracks(label=True):
            if self._labelNeedsUpdate[l]:

        self._labelNeedsUpdate = {l: False for l in self._labels}

        # remove "ghost" labels (i.e. label with empty timeline)
        labels = self._labels.keys()
        for l in labels:
            if not self._labels[l]:

    def __len__(self):
        """Number of segments"""
        return self._tracks.length()

    def __nonzero__(self):
        return self._tracks.length() > 0

    def itersegments(self):
        """Segment iterator"""
        return iter(self._tracks)

    def itertracks(self, label=False):
        for segment, tracks in self._tracks.items():
            for track, lbl in tracks.iteritems():
                if label:
                    yield segment, track, lbl
                    yield segment, track

    def _updateTimeline(self):
        self._timeline = Timeline(segments=self._tracks, uri=self.uri)
        self._timelineNeedsUpdate = False

    def get_timeline(self):
        """Get timeline made of annotated segments"""
        if self._timelineNeedsUpdate:
        return self._timeline

    def __eq__(self, other):
        return self._tracks == other._tracks

    def __ne__(self, other):
        return self._tracks != other._tracks

    def __contains__(self, included):

        Use expression 'segment in annotation' or 'timeline in annotation'

        included : `Segment` or `Timeline`

        contains : bool
            True if every segment in `included` exists in annotation
            False otherwise

        return included in self.get_timeline()

    def crop(self, other, mode='intersection'):
        """Crop annotation

        other : `Segment` or `Timeline`

        mode : {'strict', 'loose', 'intersection'}
            In 'strict' mode, only segments fully included in focus coverage
            are kept. In 'loose' mode, any intersecting segment is kept
            unchanged. In 'intersection' mode, only intersecting segments are
            kept and replaced by their actual intersection with the focus.

        cropped : Annotation

        In 'intersection' mode, the best is done to keep the track names
        unchanged. However, in some cases where two original segments are
        cropped into the same resulting segments, conflicting track names are
        modified to make sure no track is lost.

        if isinstance(other, Segment):
            other = Timeline(segments=[other], uri=self.uri)
            cropped = self.crop(other, mode=mode)

        elif isinstance(other, Timeline):

            cropped = self.__class__(uri=self.uri, modality=self.modality)

            if mode == 'loose':
                # TODO
                # update co_iter to yield (segment, tracks), (segment, tracks)
                # instead of segment, segment
                # This would avoid calling ._tracks.get(segment)
                for segment, _ in self.get_timeline().co_iter(other):
                    for track, label in self._tracks[segment].iteritems():
                        cropped[segment, track] = label

            elif mode == 'strict':
                # TODO
                # see above
                for segment, other_segment in self.get_timeline().co_iter(other):
                    if segment in other_segment:
                        for track, label in self._tracks[segment].iteritems():
                            cropped[segment, track] = label

            elif mode == 'intersection':
                # see above
                for segment, other_segment in self.get_timeline().co_iter(other):
                    intersection = segment & other_segment
                    for track, label in self._tracks[segment].iteritems():
                        track = cropped.new_track(intersection,
                        cropped[intersection, track] = label

                raise NotImplementedError("unsupported mode: '%s'" % mode)

        return cropped

    def get_tracks(self, segment):
        """Set of tracks for query segment

        segment : `Segment`
            Query segment

        tracks : set
            Set of tracks for query segment
        return set(self._tracks.get(segment, {}))

    def has_track(self, segment, track):
        """Check whether a given track exists

        segment : `Segment`
            Query segment
        track :
            Query track

        exists : bool
            True if track exists for segment
        return track in self._tracks.get(segment, {})

    def get_track_by_name(self, track):
        """Get all tracks with given name

        track : any valid track name
            Requested name track

        tracks : list
            List of (segment, track) tuples
        raise NotImplementedError('')

    def copy(self):

        # create new empty annotation
        copied = self.__class__(uri=self.uri, modality=self.modality)

        # deep copy internal track dictionary
        _tracks = [(key, dict(value)) for (key, value) in self._tracks.items()]
        copied._tracks = SortedDict(items=_tracks,
                                    key_type=(float, float),

        # deep copy internal label timelines
        _labels = {key: timeline.copy()
                   for (key, timeline) in self._labels.iteritems()}
        copied._labels = _labels

        # deep copy need-update indicator
        copied._labelNeedsUpdate = dict(self._labelNeedsUpdate)

        copied._timelineNeedsUpdate = self._timelineNeedsUpdate

        return copied

    def retrack(self):
        retracked = self.__class__(uri=self.uri, modality=self.modality)
        for n, (s, _, label) in enumerate(self.itertracks(label=True)):
            retracked[s, n] = label
        return retracked

    def new_track(self, segment, candidate=None, prefix=None):
        """Track name generator

        segment : Segment
        prefix : str, optional
        candidate : any valid track name

        track : str
            New track name

        # obtain list of existing tracks for segment
        existing_tracks = set(self._tracks.get(segment, {}))

        # if candidate is provided, check whether it already exists
        # in case it does not, use it
        if (candidate is not None) and (candidate not in existing_tracks):
            return candidate

        # no candidate was provided or the provided candidate already exists
        # we need to create a brand new one

        # by default (if prefix is not provided), use ''
        if prefix is None:
            prefix = ''

        # find first non-existing track name for segment
        # eg. if '0' exists, try '1', then '2', ...
        count = 0
        while ('%s%d' % (prefix, count)) in existing_tracks:
            count += 1

        # return first non-existing track name
        return '%s%d' % (prefix, count)

    def __str__(self):
        """Human-friendly representation"""
        # TODO: use pandas.DataFrame
        return "\n".join(["%s %s %s" % (s, t, l)
                          for s, t, l in self.itertracks(label=True)])

    def __delitem__(self, key):

        # del annotation[segment]
        if isinstance(key, Segment):

            # Pop segment out of dictionary
            # and get corresponding tracks
            # Raises KeyError if segment does not exist
            tracks = self._tracks.pop(key)

            # mark timeline as modified
            self._timelineNeedsUpdate = True

            # mark every label in tracks as modified
            for track, label in tracks.iteritems():
                self._labelNeedsUpdate[label] = True

        # del annotation[segment, track]
        elif isinstance(key, tuple) and len(key) == 2:

            # get segment tracks as dictionary
            # if segment does not exist, get empty dictionary
            # Raises KeyError if segment does not exist
            tracks = self._tracks[key[0]]

            # pop track out of tracks dictionary
            # and get corresponding label
            # Raises KeyError if track does not exist
            label = tracks.pop(key[1])

            # mark label as modified
            self._labelNeedsUpdate[label] = True

            # if tracks dictionary is now empty,
            # remove segment as well
            if not tracks:
                self._timelineNeedsUpdate = True

            raise KeyError('')

    # label = annotation[segment, track]
    def __getitem__(self, key):

        if isinstance(key, Segment):
            key = (key, '_')

        return self._tracks[key[0]][key[1]]

    # annotation[segment, track] = label
    def __setitem__(self, key, label):

        if isinstance(key, Segment):
            key = (key, '_')

        if key[0] not in self._tracks:
            self._tracks[key[0]] = {}
            self._timelineNeedsUpdate = True

        self._tracks[key[0]][key[1]] = label
        self._labelNeedsUpdate[label] = True

    def empty(self):
        return self.__class__(uri=self.uri, modality=self.modality)

    def _cmp_labels(label1, label2):
        # unknown > not_unknown
        # otherwise, just use regular cmp
        u1 = isinstance(label1, Unknown)
        u2 = isinstance(label2, Unknown)
        if u1 == u2:
            return cmp(label1, label2)
        return u1 - u2

    def labels(self, unknown=True):
        """List of labels

        unknown : bool, optional
            When False, do not return Unknown instances
            When True, return any label (even Unknown instances)

        labels : list
            Sorted list of labels

        if any([lnu for lnu in self._labelNeedsUpdate.values()]):

        labels = sorted(self._labels, cmp=self._cmp_labels)

        if not unknown:
            labels = [l for l in labels if not isinstance(l, Unknown)]

        return labels

    def get_labels(self, segment, unknown=True, unique=True):
        """Local set of labels

        segment : Segment
            Segments to get label from.
        unknown : bool, optional
            When False, do not return Unknown instances
            When True, return any label (even Unknown instances)
        unique : bool, optional
            When False, return the list of (possibly repeated) labels.
            When True (default), return the set of labels
        labels : set
            Set of labels for `segment` if it exists, empty set otherwise.


            >>> annotation = Annotation()
            >>> segment = Segment(0, 2)
            >>> annotation[segment, 'speaker1'] = 'Bernard'
            >>> annotation[segment, 'speaker2'] = 'John'
            >>> print sorted(annotation.get_labels(segment))
            set(['Bernard', 'John'])
            >>> print annotation.get_labels(Segment(1, 2))


        labels = self._tracks.get(segment, {}).values()

        if not unknown:
            labels = [l for l in labels if not isinstance(l, Unknown)]

        if unique:
            labels = set(labels)

        return labels

    def subset(self, labels, invert=False):
        """Annotation subset

        Extract annotation subset based on labels

        labels : set
            Set of labels
        invert : bool, optional
            If invert is True, extract all but requested `labels`

        subset : `Annotation`
            Annotation subset.

        if not isinstance(labels, set):
            raise TypeError('labels must be provided as a set of labels.')

        if invert:
            labels = set(self.labels()) - labels
            labels = labels & set(self.labels())

        sub = self.__class__(uri=self.uri, modality=self.modality)
        for segment, track, label in self.itertracks(label=True):
            if label in labels:
                sub[segment, track] = label

        return sub

    def label_timeline(self, label):
        """Get timeline for a given label

        label :

        timeline : :class:`Timeline`
            Timeline made of all segments annotated with `label`

        if label not in self.labels():
            return Timeline(uri=self.uri)

        if self._labelNeedsUpdate[label]:

            for l, hasChanged in self._labelNeedsUpdate.iteritems():
                if hasChanged:
                    self._labels[l] = Timeline(uri=self.uri)

            for segment, track, l in self.itertracks(label=True):
                if self._labelNeedsUpdate[l]:

            self._labelNeedsUpdate = {l: False for l in self._labels}

        return self._labels[label].copy()

    def label_coverage(self, label):

        label :


        if label not in self.labels():
            return Timeline(uri=self.uri)

        return self.label_timeline(label).coverage()

    def label_duration(self, label):

        if label not in self.labels():
            return 0.

        return self.label_timeline(label).duration()

    def chart(self, percent=False):
        Label chart based on their duration

        percent : bool, optional
            Return total duration percentage (rather than raw duration)

        chart : (label, duration) iterable
            Sorted from longest to shortest.


        chart = sorted([(label, self.label_duration(label))
                        for label in self.labels()],
                       key=lambda x: x[1], reverse=True)

        if percent:
            total = np.sum([duration for _, duration in chart])
            chart = [(label, duration/total) for (label, duration) in chart]

        return chart

    def argmax(self, segment=None, known_first=False):
        """Get most frequent label

        segment : Segment, optional
            Section of annotation where to look for the most frequent label.
            Defaults to whole annotation extent.
        known_first: bool, optional
            If True, artificially reduces the duration of intersection of
            `Unknown` labels so that 'known' labels are returned first.

        label : any existing label or None
            Label with longest intersection


            >>> annotation = Annotation(modality='speaker')
            >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice'
            >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob'
            >>> print "%s is such a talker!" % annotation.argmax()
            Bob is such a talker!
            >>> segment = Segment(22, 23)
            >>> if not annotation.argmax(segment):
            ...    print "No label intersecting %s" % segment
            No label intersection [22 --> 23]


        # if annotation is empty, obviously there is no most frequent label
        if not self:
            return None

        # if segment is not provided, just look for the overall most frequent
        # label (ie. set segment to the extent of the annotation)
        if segment is None:
            segment = self.get_timeline().extent()

        # compute intersection duration for each label
        durations = {lbl: self.label_timeline(lbl).crop(segment, mode='intersection').duration()
                     for lbl in self.labels()}

        # artifically reduce intersection duration of Unknown labels
        # so that 'known' labels are returned first
        if known_first:
            maxduration = max(durations.values())
            for lbl in durations.keys():
                if isinstance(lbl, Unknown):
                    durations[lbl] = durations[lbl] - maxduration

        # find the most frequent label
        label = max(durations.iteritems(), key=operator.itemgetter(1))[0]

        # in case all durations were zero, there is no most frequent label
        return label if durations[label] > 0 else None

    def translate(self, translation):
        """Translate labels

        translation: dict
            Label translation.
            Labels with no associated translation are kept unchanged.

        translated : :class:`Annotation`
            New annotation with translated labels.

        assert isinstance(translation, dict)

        # create an empty copy
        translated = self.empty()

        for segment, track, label in self.itertracks(label=True):
            # only transform labels that have an actual translation
            # in the provided dictionary, keep the others as they are.
            translated[segment, track] = translation.get(label, label)

        return translated

    def __mod__(self, translation):
        return self.translate(translation)

    def anonymize_labels(self):
        """Anonmyize labels

        Create a new annotation where labels are anonymized, ie. each label
        is replaced by a unique `Unknown` instance.

        anonymized : :class:`Annotation`
            New annotation with anonymized labels.

        translation = {label: Unknown() for label in self.labels()}
        return self % translation

    def anonymize_tracks(self):
        Anonymize tracks

        Create a new annotation where each track is anonymized, i.e. the label
        of each track is set to a unique `Unknown` instance

        anonymized : `Annotation`
            Anonymized annotation

        anonymized = self.empty()
        for s, t, _ in self.itertracks(label=True):
            anonymized[s, t] = Unknown()
        return anonymized

    def smooth(self, collar=0.):
        """Smooth annotation

        Create new annotation where contiguous tracks with same label are
        merged into one longer track.

        collar : float
            If collar is positive, also merge tracks separated by less than
            collar duration.

        annotation : Annotation
            New annotation where contiguous tracks with same label are merged
            into one long track.

            Track names are lost in the process.

        # initialize an empty annotation
        # with same uri and modality as original
        smoothed = self.empty()
        for label in self.labels():

            # get timeline for current label
            timeline = self.label_timeline(label)

            # fill the gaps shorter than collar
            if collar > 0.:
                gaps = timeline.gaps()
                for gap in gaps:
                    if gap.duration < collar:

            # reconstruct annotation with merged tracks
            for segment in timeline.coverage():
                track = smoothed.new_track(segment)
                smoothed[segment, track] = label

        # return
        return smoothed

    def co_iter(self, other):
        other : Annotation

        (segment, track), (other_segment, other_track)

        for s, S in self.get_timeline().co_iter(other.get_timeline()):
            tracks = self.get_tracks(s)
            other_tracks = other.get_tracks(S)
            for t, T in itertools.product(tracks, other_tracks):
                yield (s, t), (S, T)

    def for_json(self):
        data = {
                [s.for_json(), t, l] for s, t, l in self.itertracks(label=True)
        if self.uri:
            data[PYANNOTE_URI] = self.uri
        if self.modality:
            data[PYANNOTE_MODALITY] = self.modality
        return data

    def from_json(cls, data):
        uri = data.get(PYANNOTE_URI, None)
        modality = data.get(PYANNOTE_MODALITY, None)
        annotation = cls(uri=uri, modality=modality)
        for segment, track, label in data[PYANNOTE_JSON_ANNOTATION]:
            annotation[segment, track] = label
        return annotation

    def _repr_png_(self):
        from pyannote.core.notebook import repr_annotation
        return repr_annotation(self)