Exemple #1
0
    def preprocess(self, current_file, identifier=None):
        """Pre-compute file-wise X and y"""

        # extract features for the whole file
        # (if it has not been done already)
        current_file = self.periodic_preprocess(current_file,
                                                identifier=identifier)

        # if labels have already been extracted, do nothing
        if identifier in self.preprocessed_.setdefault('y', {}):
            return current_file

        # get features as pyannote.core.SlidingWindowFeature instance
        X = self.preprocessed_['X'][identifier]
        sw = X.sliding_window
        n_samples = X.getNumber()

        y = np.zeros((n_samples + 4, 1), dtype=np.int8) - 1
        # [-1] ==> unknown / [0] ==> not change part / [1] ==> change part

        annotated = get_annotated(current_file)
        annotation = current_file['annotation']

        segments = []
        for segment, _ in annotation.itertracks():
            segments.append(
                Segment(segment.start - self.balance,
                        segment.start + self.balance))
            segments.append(
                Segment(segment.end - self.balance,
                        segment.end + self.balance))
        change_part = Timeline(segments).support().crop(annotated,
                                                        mode='intersection')

        # iterate over non-change regions
        for non_changes in change_part.gaps(annotated):
            indices = sw.crop(non_changes, mode='loose')
            y[indices, 0] = 0

        # iterate over change regions
        for changes in change_part:
            indices = sw.crop(changes, mode='loose')
            y[indices, 0] = 1

        y = SlidingWindowFeature(y[:-1], sw)
        self.preprocessed_['y'][identifier] = y

        return current_file
Exemple #2
0
def main():
    usage = "%prog [options] RTTMone RTTMtwo"
    desc = "Convert the txtfile from diarization of the from: \
            ID t_in t_out \
            into a kaldi format file for spkdet task"

    version = "%prog 0.1"
    parser = OptionParser(usage=usage, description=desc, version=version)
    (opt, args) = parser.parse_args()

    if (len(args) != 3):
        parser.error("Incorrect number of arguments")
    vadrttm, overlaprttm, outputrttm = args

    # Read document and loaded in memory
    vad = pyannote.database.util.load_rttm(vadrttm)
    ovl = pyannote.database.util.load_rttm(overlaprttm)

    fw = open(outputrttm, 'wt')
    for name in vad:

        # Examples
        # speech = vad['EN2002a.Mix-Headset-0000000-0006000'].get_timeline()
        # duration = vad['EN2002a.Mix-Headset-0000000-0006000'].get_timeline()[-1][1]
        # overlap = ovl['EN2002a.Mix-Headset-0000000-0006000'].get_timeline()
        speech = vad[name].get_timeline()
        duration = vad[name].get_timeline()[-1][1]
        if name in ovl.keys():
            overlap = ovl[name].get_timeline()

            # just get the intersections of the VAD and overlap
            intersection = Timeline()
            for speech_segment, overlap_segment in speech.co_iter(overlap):
                intersection.add(speech_segment & overlap_segment)

            keep = intersection.gaps(support=Segment(0, duration))

            vad_without_overlap = speech.crop(keep)
        else:
            vad_without_overlap = speech

        # Write RTTM
        write_rttm(fw, vad_without_overlap, label='speech')
    fw.close()
Exemple #3
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_speaker().open('r') as fp:
            speaker = pyannote.core.json.load(fp)

        timeline = Timeline()
        for segment, _ in speaker.itertracks():
            timeline.add(segment)

        # fill gaps
        for gap in timeline.gaps(extent):
            if gap.duration < self.fill_gaps:
                timeline.add(gap)

        timeline = timeline.coverage()

        # dump as annotation...
        if self.to_annotation:

            annotation = Annotation()
            for s, segment in enumerate(timeline):
                annotation[segment] = s
            annotation = annotation.anonymize_labels(generator='string')

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(annotation, fp)

        # ... or as timeline
        else:

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(timeline, fp)
Exemple #4
0
    def remove_excluded(self):
        if len(self.excluded) == 0:
            return

        from pyannote.core import Segment, Timeline

        segments = []
        for recording, _segments in self.segments.groupby(
                "recording_filename"):
            sampled = Timeline(segments=[
                Segment(segment_onset, segment_offset)
                for segment_onset, segment_offset in _segments[
                    ["segment_onset", "segment_offset"]].values
            ])

            excl_segments = self.excluded.loc[
                self.excluded["recording_filename"] == recording]
            excl = Timeline(segments=[
                Segment(segment_onset, segment_offset)
                for segment_onset, segment_offset in excl_segments[
                    ["segment_onset", "segment_offset"]].values
            ])

            # sampled = sampled.extrude(sampled) # not released yet
            extent_tl = Timeline([sampled.extent()], uri=sampled.uri)
            truncating_support = excl.gaps(support=extent_tl)
            sampled = sampled.crop(truncating_support, mode="intersection")

            segments.append(
                pd.DataFrame(
                    [[recording, s.start, s.end] for s in sampled],
                    columns=[
                        "recording_filename", "segment_onset", "segment_offset"
                    ],
                ))

        self.segments = pd.concat(segments)
Exemple #5
0
def serial_speaker_to_Annotation(serial_speaker, uri=None, modality='speaker'):
    """
    Parameters:
    -----------
    serial_speaker : `dict`
        loaded from a serial speaker JSON as defined
        in https://figshare.com/articles/TV_Series_Corpus/3471839
    uri (uniform resource identifier) : `str`, optional
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`, optional
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined
        in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the serial_speaker file
        Unknown speakers are not considered as annotated
    """

    annotation = Annotation(uri, modality)
    not_annotated = Timeline(uri=uri)

    for segment in serial_speaker["data"]["speech_segments"]:
        time = Segment(segment["start"], segment["end"])
        speaker_id = segment['speaker'].replace(" ", "_")
        annotation[time, speaker_id] = speaker_id
        if speaker_id == 'unknown':
            not_annotated.add(time)

    end = serial_speaker.get("duration", segment["end"])
    annotated = not_annotated.gaps(support=Segment(0.0, end))
    return annotation, annotated
Exemple #6
0
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        if self.log_scale:
            data = np.exp(data)

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        onset = mini + self.onset * (maxi - mini)
        offset = mini + self.offset * (maxi - mini)

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.support()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration_on])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration_off:
                active.add(s)
        active = active.support()

        return active
def gecko_JSON_to_Annotation(gecko_JSON,
                             uri=None,
                             modality='speaker',
                             confidence_threshold=0.0,
                             collar=0.0,
                             expected_min_speech_time=0.0,
                             manual=False):
    """
    Parameters:
    -----------
    gecko_JSON : `dict`
        loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON
    uri (uniform resource identifier) : `str`
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core
    confidence_threshold : `float`, Optional.
        The segments with confidence under confidence_threshold won't be added to UEM file.
        Defaults to keep every segment (i.e. 0.0)
    collar: `float`, Optional.
        Merge tracks with same label and separated by less than `collar` seconds.
        Defaults to keep tracks timeline untouched (i.e. 0.0)
    expected_min_speech_time: `float`, Optional.
        Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user).
        Defaults to never suspect anything (i.e. 0.0)
    manual : `bool`
        Whether the json is coming from a manual correction or straight from
        the forced-alignment output.
        In the former case, the regions timing is used. `confidence_threshold`
            and `collar` are thus irrelevant.
        In the latter case (default), the timing of each term is used.

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the gecko_JSON files (depends on confidence_threshold)
    """
    annotation = Annotation(uri, modality)
    not_annotated = Timeline(uri=uri)
    for monologue in gecko_JSON["monologues"]:
        if not monologue:
            continue
        # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt
        # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35
        speaker_ids = re.split("@|\+", monologue["speaker"]["id"])
        if manual:
            for speaker_id in speaker_ids:  # most of the time there's only one
                if speaker_id != '':  # happens with "all@"
                    annotation[Segment(monologue["start"], monologue["end"]),
                               speaker_id] = speaker_id
        else:
            for i, term in enumerate(monologue["terms"]):
                for speaker_id in speaker_ids:  # most of the time there's only one
                    if speaker_id != '':  # happens with "all@"
                        annotation[Segment(term["start"], term["end"]),
                                   speaker_id] = speaker_id
                if term["confidence"] <= confidence_threshold:
                    not_annotated.add(Segment(term["start"], term["end"]))

    if manual:
        annotated = Timeline([Segment(0.0, monologue["end"])], uri)
    else:
        annotation = annotation.support(collar)
        annotated = not_annotated.gaps(support=Segment(0.0, term["end"]))
    total_speech_time = annotation.crop(annotated).get_timeline().duration()
    if total_speech_time < expected_min_speech_time:
        warnings.warn(
            f"total speech time of {uri} is only {total_speech_time})")
    return annotation, annotated
Exemple #8
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(support=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(support=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(support=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        # for segment in correct:
        #     status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'shift'
        for segment in only_over:
            status[segment, '_'] = 'over-segmentation'
        for segment in only_under:
            status[segment, '_'] = 'under-segmentation'

        return status.support()
Exemple #9
0
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < self.offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > self.onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.coverage()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration[1]])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration[0]:
                active.add(s)
        active = active.coverage()

        return active
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(focus=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(focus=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(focus=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        for segment in correct:
            status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'frontier'
        for segment in only_over:
            status[segment, '_'] = 'over'
        for segment in only_under:
            status[segment, '_'] = 'under'

        return status.smooth()
def test_empty_gaps():
    empty_timeline = Timeline(uri='MyEmptyGaps')
    assert list(empty_timeline.gaps()) == []
    Segment.set_precision(3)
    assert list(empty_timeline.gaps()) == []
Exemple #12
0
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        if self.log_scale:
            data = np.exp(data)

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        onset = mini + self.onset * (maxi - mini)
        offset = mini + self.offset * (maxi - mini)

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.support()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration_on])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration_off:
                active.add(s)
        active = active.support()

        return active