예제 #1
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        if sequence in [Stream.EndOfStream, Stream.NoNewData]:
            return sequence

        data = sequence.data
        active = data[0]

        sw = sequence.sliding_window
        start = sw[0].middle

        timeline = Timeline()
        timeline.start = start

        for i, y in enumerate(data):
            if active and not y:
                segment = Segment(start, sw[i].middle)
                timeline.add(segment)
                active = False
            elif not active and y:
                active = True
                start = sw[i].middle

        if active:
            segment = Segment(start, sw[i].middle)
            timeline.add(segment)

        timeline.end = sw[i].middle

        return timeline
예제 #2
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        if sequence in [Stream.EndOfStream, Stream.NoNewData]:
            return sequence

        data = sequence.data
        active = data[0]

        sw = sequence.sliding_window
        start = sw[0].middle

        timeline = Timeline()
        timeline.start = start

        for i, y in enumerate(data):
            if active and not y:
                segment = Segment(start, sw[i].middle)
                timeline.add(segment)
                active = False
            elif not active and y:
                active = True
                start = sw[i].middle

        if active:
            segment = Segment(start, sw[i].middle)
            timeline.add(segment)

        timeline.end = sw[i].middle

        return timeline
예제 #3
0
    def apply(self, features, segmentation=None):
        """
        Parameters
        ----------
        features : Features
        segmentation : Timeline, optional
        """

        if segmentation is None:
            segmentation = Timeline(segments=[features.getExtent()])

        sliding_window = features.sliding_window
        min_samples = sliding_window.durationToSamples(self.min_duration)
        precision = sliding_window.durationToSamples(self.precision)

        segmenter = SKLearnBICSegmentation(
            penalty_coef=self.penalty_coef,
            covariance_type=self.covariance_type,
            min_samples=min_samples,
            precision=precision)

        result = Timeline()

        for long_segment in segmentation:

            X = features.crop(long_segment)
            boundaries = segmenter.apply(X)
            for t, T in pairwise(boundaries):
                segment = sliding_window.rangeToSegment(t, T - t)
                shifted_segment = Segment(long_segment.start + segment.start,
                                          long_segment.start + segment.end)
                result.add(shifted_segment)

        return result
예제 #4
0
    def apply(self, features, segmentation=None):
        """
        Parameters
        ----------
        features : Features
        segmentation : Timeline, optional
        """

        if segmentation is None:
            segmentation = Timeline(segments=[features.getExtent()])

        sliding_window = features.sliding_window
        min_samples = sliding_window.durationToSamples(self.min_duration)
        precision = sliding_window.durationToSamples(self.precision)

        segmenter = SKLearnBICSegmentation(
            penalty_coef=self.penalty_coef,
            covariance_type=self.covariance_type,
            min_samples=min_samples,
            precision=precision)

        result = Timeline()

        for long_segment in segmentation:

            X = features.crop(long_segment)
            boundaries = segmenter.apply(X)
            for t, T in pairwise(boundaries):
                segment = sliding_window.rangeToSegment(t, T - t)
                shifted_segment = Segment(long_segment.start + segment.start,
                                          long_segment.start + segment.end)
                result.add(shifted_segment)

        return result
예제 #5
0
def write_test_file(data_dir, output_file, trial_length):
    annotations, max_length, speakers = read_annotaitons(data_dir)
    # create an artificial non-overlapping segments each of the trial_length size
    trial_segments = Timeline()
    for i in range(0, int(max_length) // trial_length):
        trial_segments.add(Segment(start=i*trial_length, end=(i+1)*trial_length))

    with open(output_file, 'w') as f:
        for label in speakers.keys():
            for annotation in annotations:
                # make sure our trial segments are not extending beyond the total length of the speech data
                support = annotation.get_timeline().extent()
                # we consider smaller segment here to make sure an embedding of 3 seconds can be computed
                adjusted_trial_segments = trial_segments.crop(Segment(start=support.start, end=support.end - 3.),
                                                              mode='loose')
                uri = annotation.uri
                cur_timeline = annotation.label_timeline(label, copy=False)
                for trial_segment in adjusted_trial_segments:
                    cropped_speaker = cur_timeline.crop(trial_segment, mode='intersection')
                    if not cropped_speaker:
                        f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} nontarget - -\n'.format(
                            label,
                            uri,
                            trial_segment.start,
                            trial_segment.end))
                    else:
                        f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} target {4:0>7.2f} {5:0>7.2f}\n'.format(
                            label,
                            uri,
                            trial_segment.start,
                            trial_segment.end,
                            cropped_speaker[0].start,
                            cropped_speaker[0].duration))
예제 #6
0
    def _preprocess(self, reference, hypothesis):

        if not isinstance(reference, Annotation):
            raise TypeError('reference must be an instance of `Annotation`')

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # reference where short intra-label gaps are removed
        filled = Timeline()
        for label in reference.labels():
            label_timeline = reference.label_timeline(label)
            for gap in label_timeline.gaps():
                if gap.duration < self.tolerance:
                    label_timeline.add(gap)

            for segment in label_timeline.coverage():
                filled.add(segment)

        # reference coverage after filling gaps
        coverage = filled.coverage()

        reference_partition = self._partition(filled, coverage)
        hypothesis_partition = self._partition(hypothesis, coverage)

        return reference_partition, hypothesis_partition
예제 #7
0
    def _preprocess(self, reference, hypothesis):

        if not isinstance(reference, Annotation):
            raise TypeError('reference must be an instance of `Annotation`')

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # reference where short intra-label gaps are removed
        filled = Timeline()
        for label in reference.labels():
            label_timeline = reference.label_timeline(label)
            for gap in label_timeline.gaps():
                if gap.duration < self.tolerance:
                    label_timeline.add(gap)

            for segment in label_timeline.support():
                filled.add(segment)

        # reference coverage after filling gaps
        coverage = filled.support()

        reference_partition = self._partition(filled, coverage)
        hypothesis_partition = self._partition(hypothesis, coverage)

        return reference_partition, hypothesis_partition
예제 #8
0
def test_remove_and_extent():
    t = Timeline(uri='MyAudioFile')
    t.add(Segment(6, 8))
    t.add(Segment(7, 9))
    t.add(Segment(6, 9))

    t.remove(Segment(6, 9))
    assert t.extent() == Segment(6, 9)
예제 #9
0
    def apply(self, predictions, dimension=0):
        """Peak detection

        Parameter
        ---------
        predictions : SlidingWindowFeature
            Predictions returned by segmentation approaches.

        Returns
        -------
        segmentation : Timeline
            Partition.
        """

        if len(predictions.data.shape) == 1:
            y = predictions.data
        elif predictions.data.shape[1] == 1:
            y = predictions.data[:, 0]
        else:
            y = predictions.data[:, dimension]

        if self.log_scale:
            y = np.exp(y)

        sw = predictions.sliding_window

        precision = sw.step
        order = max(1, int(np.rint(self.min_duration / precision)))
        indices = scipy.signal.argrelmax(y, order=order)[0]

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(y)
            maxi = np.nanmax(y)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(y, 1)
            maxi = np.nanpercentile(y, 99)

        threshold = mini + self.alpha * (maxi - mini)

        peak_time = np.array(
            [sw[i].middle for i in indices if y[i] > threshold])

        n_windows = len(y)
        start_time = sw[0].start
        end_time = sw[n_windows].end

        boundaries = np.hstack([[start_time], peak_time, [end_time]])
        segmentation = Timeline()
        for i, (start, end) in enumerate(pairwise(boundaries)):
            segment = Segment(start, end)
            segmentation.add(segment)

        return segmentation
예제 #10
0
    def apply(self, predictions, dimension=0):
        """Peak detection

        Parameter
        ---------
        predictions : SlidingWindowFeature
            Predictions returned by segmentation approaches.

        Returns
        -------
        segmentation : Timeline
            Partition.
        """

        if len(predictions.data.shape) == 1:
            y = predictions.data
        elif predictions.data.shape[1] == 1:
            y = predictions.data[:, 0]
        else:
            y = predictions.data[:, dimension]

        if self.log_scale:
            y = np.exp(y)

        sw = predictions.sliding_window

        precision = sw.step
        order = max(1, int(np.rint(self.min_duration / precision)))
        indices = scipy.signal.argrelmax(y, order=order)[0]

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        threshold = mini + self.alpha * (maxi - mini)

        peak_time = np.array([sw[i].middle for i in indices if y[i] > threshold])

        n_windows = len(y)
        start_time = sw[0].start
        end_time = sw[n_windows].end

        boundaries = np.hstack([[start_time], peak_time, [end_time]])
        segmentation = Timeline()
        for i, (start, end) in enumerate(pairwise(boundaries)):
            segment = Segment(start, end)
            segmentation.add(segment)

        return segmentation
예제 #11
0
def timeline():
    t = Timeline(uri='MyAudioFile')
    t.add(Segment(6, 8))
    t.add(Segment(0.5, 3))
    t.add(Segment(8.5, 10))
    t.add(Segment(1, 4))
    t.add(Segment(5, 7))
    t.add(Segment(7, 8))

    return t
예제 #12
0
def timeline():
    t = Timeline(uri='MyAudioFile')
    t.add(Segment(6, 8))
    t.add(Segment(0.5, 3))
    t.add(Segment(8.5, 10))
    t.add(Segment(1, 4))
    t.add(Segment(5, 7))
    t.add(Segment(7, 8))

    return t
예제 #13
0
    def run(self):
        with self.in_subtitles().open('r') as fp:
            transcription = pyannote.core.json.load(fp)
        timeline = Timeline()
        for start, end, edge in transcription.ordered_edges_iter(data=True):
            if 'subtitle' not in edge:
                continue
            segment = Segment(start, end)
            timeline.add(segment)

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(timeline, fp)
예제 #14
0
def test_added_empty_segments():
  # The first timeline includes empty segments.
  first_timeline = Timeline()
  first_timeline.add(Segment(1, 5))
  first_timeline.add(Segment(6, 6))
  first_timeline.add(Segment(7, 7))
  first_timeline.add(Segment(8, 10))

  # The second has no empty segments.
  second_timeline = Timeline()
  second_timeline.add(Segment(1, 5))
  second_timeline.add(Segment(8, 10))

  assert first_timeline == second_timeline
def overlap_timeline(uri, annotation):
    timeline = annotation.get_timeline()
    segmentation = timeline.segmentation()
    l_segments = [{'seg': segment, 'count': 0} for segment in segmentation]
    #print(l_segments)
    for seg in timeline:
        for curr in l_segments:
            if curr['seg'] in seg:
                curr['count'] += 1
    overlap_timeline = Timeline(uri=uri)
    for curr in l_segments:
        if curr['count'] > 1:
            overlap_timeline.add(curr['seg'])
    return overlap_timeline
예제 #16
0
def uem_timeline_from_file(uem_file, uniq_name=''):
    """
    outputs pyannote timeline segments for uem file

     <UEM> file format
     UNIQ_SPEAKER_ID CHANNEL START_TIME END_TIME
    """
    timeline = Timeline(uri=uniq_name)
    with open(uem_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            speaker_id, channel, start_time, end_time = line.split()
            timeline.add(Segment(float(start_time), float(end_time)))

    return timeline
예제 #17
0
def main():
    usage = "%prog [options] RTTMone RTTMtwo"
    desc = "Convert the txtfile from diarization of the from: \
            ID t_in t_out \
            into a kaldi format file for spkdet task"

    version = "%prog 0.1"
    parser = OptionParser(usage=usage, description=desc, version=version)
    (opt, args) = parser.parse_args()

    if (len(args) != 3):
        parser.error("Incorrect number of arguments")
    vadrttm, overlaprttm, outputrttm = args

    # Read document and loaded in memory
    vad = pyannote.database.util.load_rttm(vadrttm)
    ovl = pyannote.database.util.load_rttm(overlaprttm)

    fw = open(outputrttm, 'wt')
    for name in vad:

        # Examples
        # speech = vad['EN2002a.Mix-Headset-0000000-0006000'].get_timeline()
        # duration = vad['EN2002a.Mix-Headset-0000000-0006000'].get_timeline()[-1][1]
        # overlap = ovl['EN2002a.Mix-Headset-0000000-0006000'].get_timeline()
        speech = vad[name].get_timeline()
        duration = vad[name].get_timeline()[-1][1]
        if name in ovl.keys():
            overlap = ovl[name].get_timeline()

            # just get the intersections of the VAD and overlap
            intersection = Timeline()
            for speech_segment, overlap_segment in speech.co_iter(overlap):
                intersection.add(speech_segment & overlap_segment)

            keep = intersection.gaps(support=Segment(0, duration))

            vad_without_overlap = speech.crop(keep)
        else:
            vad_without_overlap = speech

        # Write RTTM
        write_rttm(fw, vad_without_overlap, label='speech')
    fw.close()
예제 #18
0
def test_timeline_overlaps():
    overlapped_tl = Timeline(uri="La menuiserie mec")
    overlapped_tl.add(Segment(0, 10))
    overlapped_tl.add(Segment(5, 10))
    overlapped_tl.add(Segment(15, 20))
    overlapped_tl.add(Segment(18, 23))

    expected_overlap = Timeline()
    expected_overlap.add(Segment(5, 10))
    expected_overlap.add(Segment(18, 20))

    assert expected_overlap == overlapped_tl.get_overlap()
예제 #19
0
def test_crop(timeline):
    selection = Segment(3, 7)

    expected_answer = Timeline(uri='MyAudioFile')
    expected_answer.add(Segment(3, 4))
    expected_answer.add(Segment(5, 7))
    expected_answer.add(Segment(6, 7))

    assert timeline.crop(selection, mode='intersection') == expected_answer

    expected_answer = Timeline(uri='MyAudioFile')
    expected_answer.add(Segment(5, 7))
    assert timeline.crop(selection, mode='strict') == expected_answer

    expected_answer = Timeline(uri="pouet")
    expected_answer.add(Segment(1, 4))
    expected_answer.add(Segment(5, 7))
    expected_answer.add(Segment(6, 8))

    assert timeline.crop(selection, mode='loose') == expected_answer
예제 #20
0
    def _get_collar(self, reference, duration):

        # initialize empty timeline
        collar = Timeline(uri=reference.uri)

        if duration == 0.:
            return collar

        # iterate over all segments in reference
        for segment in reference.itersegments():

            # add collar centered on start time
            t = segment.start
            collar.add(Segment(t - .5 * duration, t + .5 * duration))

            # add collar centered on end time
            t = segment.end
            collar.add(Segment(t - .5 * duration, t + .5 * duration))

        # merge overlapping collars and return
        return collar.coverage()
예제 #21
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_speaker().open('r') as fp:
            speaker = pyannote.core.json.load(fp)

        timeline = Timeline()
        for segment, _ in speaker.itertracks():
            timeline.add(segment)

        # fill gaps
        for gap in timeline.gaps(extent):
            if gap.duration < self.fill_gaps:
                timeline.add(gap)

        timeline = timeline.coverage()

        # dump as annotation...
        if self.to_annotation:

            annotation = Annotation()
            for s, segment in enumerate(timeline):
                annotation[segment] = s
            annotation = annotation.anonymize_labels(generator='string')

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(annotation, fp)

        # ... or as timeline
        else:

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(timeline, fp)
예제 #22
0
    def to_overlap(reference: Annotation) -> Annotation:
        """Get overlapped speech reference annotation

        Parameters
        ----------
        reference : Annotation
            File yielded by pyannote.database protocols.

        Returns
        -------
        overlap : `pyannote.core.Annotation`
            Overlapped speech reference.
        """

        overlap = Timeline(uri=reference.uri)
        for (s1, t1), (s2, t2) in reference.co_iter(reference):
            l1 = reference[s1, t1]
            l2 = reference[s2, t2]
            if l1 == l2:
                continue
            overlap.add(s1 & s2)
        return overlap.support().to_annotation()
예제 #23
0
def test_crop(timeline):

    selection = Segment(3,7)

    expected_answer = Timeline(uri='MyAudioFile')
    expected_answer.add(Segment(3, 4))
    expected_answer.add(Segment(5, 7))
    expected_answer.add(Segment(6, 7))

    assert timeline.crop(selection, mode='intersection') == expected_answer

    expected_answer = Timeline(uri='MyAudioFile')
    expected_answer.add(Segment(5, 7))
    assert timeline.crop(selection, mode='strict') == expected_answer


    expected_answer = Timeline(uri="pouet")
    expected_answer.add(Segment(1, 4))
    expected_answer.add(Segment(5, 7))
    expected_answer.add(Segment(6, 8))

    timeline.crop(selection, mode='loose') == expected_answer
예제 #24
0
def test_extrude():
    removed = Segment(2, 5)

    timeline = Timeline(uri='KINGJU')
    timeline.add(Segment(0, 3))
    timeline.add(Segment(2, 5))
    timeline.add(Segment(6, 7))

    expected_answer = Timeline()
    expected_answer.add(Segment(0, 2))
    expected_answer.add(Segment(6, 7))

    assert timeline.extrude(removed, mode='intersection') == expected_answer

    expected_answer = Timeline(uri="MCSALO")
    expected_answer.add(Segment(0, 3))
    expected_answer.add(Segment(6, 7))
    assert timeline.extrude(removed, mode='strict') == expected_answer

    expected_answer = Timeline(uri="CADILLAC")
    expected_answer.add(Segment(6, 7))

    assert timeline.extrude(removed, mode='loose') == expected_answer
예제 #25
0
    def apply(self, predictions):
        """Peak detection

        Parameter
        ---------
        predictions : SlidingWindowFeature
            Predictions returned by segmentation approaches.

        Returns
        -------
        segmentation : Timeline
            Partition.
        """
        y = predictions.data
        sw = predictions.sliding_window

        precision = sw.step
        order = int(np.rint(self.min_duration / precision))
        indices = scipy.signal.argrelmax(y, order=order)[0]

        mini = np.nanpercentile(y, 5)
        maxi = np.nanpercentile(y, 95)
        threshold = mini + self.alpha * (maxi - mini)

        peak_time = np.array([sw[i].middle for i in indices if y[i] > threshold])

        n_windows = len(y)
        start_time = sw[0].start
        end_time = sw[n_windows].end

        boundaries = np.hstack([[start_time], peak_time, [end_time]])
        segmentation = Timeline()
        for i, (start, end) in enumerate(pairwise(boundaries)):
            segment = Segment(start, end)
            segmentation.add(segment)

        return segmentation
예제 #26
0
파일: convert.py 프로젝트: PaulLerner/Prune
def serial_speaker_to_Annotation(serial_speaker, uri=None, modality='speaker'):
    """
    Parameters:
    -----------
    serial_speaker : `dict`
        loaded from a serial speaker JSON as defined
        in https://figshare.com/articles/TV_Series_Corpus/3471839
    uri (uniform resource identifier) : `str`, optional
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`, optional
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined
        in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the serial_speaker file
        Unknown speakers are not considered as annotated
    """

    annotation = Annotation(uri, modality)
    not_annotated = Timeline(uri=uri)

    for segment in serial_speaker["data"]["speech_segments"]:
        time = Segment(segment["start"], segment["end"])
        speaker_id = segment['speaker'].replace(" ", "_")
        annotation[time, speaker_id] = speaker_id
        if speaker_id == 'unknown':
            not_annotated.add(time)

    end = serial_speaker.get("duration", segment["end"])
    annotated = not_annotated.gaps(support=Segment(0.0, end))
    return annotation, annotated
예제 #27
0
def test_consistent_timelines_with_empty_segments():
  # The first timeline is initialized with Segments, some empty.
  first_timeline = Timeline([Segment(1, 5), Segment(6, 6), Segment(7, 7), Segment(8, 10)])

  # The second timeline adds one Segment at a time, including empty ones.
  second_timeline = Timeline()
  second_timeline.add(Segment(1, 5))
  second_timeline.add(Segment(6, 6))
  second_timeline.add(Segment(7, 7))
  second_timeline.add(Segment(8, 10))

  assert first_timeline == second_timeline
예제 #28
0
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < self.offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > self.onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.coverage()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration[1]])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration[0]:
                active.add(s)
        active = active.coverage()

        return active
예제 #29
0
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        if self.log_scale:
            data = np.exp(data)

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        onset = mini + self.onset * (maxi - mini)
        offset = mini + self.offset * (maxi - mini)

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.support()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration_on])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration_off:
                active.add(s)
        active = active.support()

        return active
예제 #30
0
    def validate_epoch(self, epoch, protocol_name, subset='development',
                       validation_data=None):

        target_precision = self.precision

        # load model for current epoch
        model = self.load_model(epoch).to(self.device)
        model.eval()

        if isinstance(self.feature_extraction_, Precomputed):
            self.feature_extraction_.use_memmap = False

        duration = self.task_.duration
        step = .25 * duration
        sequence_labeling = SequenceLabeling(
            model, self.feature_extraction_, duration=duration,
            step=.25 * duration, batch_size=self.batch_size,
            source='audio', device=self.device)

        protocol = get_protocol(protocol_name, progress=False,
                                preprocessors=self.preprocessors_)

        predictions = {}
        references = {}

        file_generator = getattr(protocol, subset)()
        for current_file in file_generator:
            uri = get_unique_identifier(current_file)

            # build overlap reference
            reference = Timeline(uri=uri)
            annotation = current_file['annotation']
            for track1, track2 in annotation.co_iter(annotation):
                if track1 == track2:
                    continue
                reference.add(track1[0] & track2[0])
            references[uri] = reference.to_annotation()

            # extract overlap scores
            scores = sequence_labeling.apply(current_file)

            if model.logsoftmax:
                scores = SlidingWindowFeature(
                    np.exp(scores.data[:, 2]), scores.sliding_window)
            else:
                scores = SlidingWindowFeature(
                    scores.data[:, 2], scores.sliding_window)

            predictions[uri] = scores

        # dichotomic search to find threshold that maximizes recall
        # while having at least `target_precision`

        lower_alpha = 0.
        upper_alpha = 1.
        best_alpha = .5 * (lower_alpha + upper_alpha)
        best_recall = 0.

        for _ in range(10):
            current_alpha = .5 * (lower_alpha + upper_alpha)
            binarizer = Binarize(onset=current_alpha,
                                 offset=current_alpha,
                                 log_scale=False)

            precision = DetectionPrecision()
            recall = DetectionRecall()

            for current_file in getattr(protocol, subset)():
                uri = get_unique_identifier(current_file)
                reference = references[uri]
                hypothesis = binarizer.apply(predictions[uri], dimension=0)
                hypothesis = hypothesis.to_annotation()
                uem = get_annotated(current_file)
                _ = precision(reference, hypothesis, uem=uem)
                _ = recall(reference, hypothesis, uem=uem)

            if abs(precision) < target_precision:
                # precision is not high enough: try higher thresholds
                lower_alpha = current_alpha
            else:
                upper_alpha = current_alpha
                r = abs(recall)
                if r > best_recall:
                    best_recall = r
                    best_alpha = current_alpha

        task = 'overlap_speech_detection'
        metric_name = f'{task}/recall@{target_precision:.2f}precision'
        return {
            metric_name: {'minimize': False, 'value': best_recall},
            f'{task}/threshold': {'minimize': 'NA', 'value': best_alpha}}
예제 #31
0
def gecko_JSON_to_Annotation(gecko_JSON,
                             uri=None,
                             modality='speaker',
                             confidence_threshold=0.0,
                             collar=0.0,
                             expected_min_speech_time=0.0,
                             manual=False):
    """
    Parameters:
    -----------
    gecko_JSON : `dict`
        loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON
    uri (uniform resource identifier) : `str`
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core
    confidence_threshold : `float`, Optional.
        The segments with confidence under confidence_threshold won't be added to UEM file.
        Defaults to keep every segment (i.e. 0.0)
    collar: `float`, Optional.
        Merge tracks with same label and separated by less than `collar` seconds.
        Defaults to keep tracks timeline untouched (i.e. 0.0)
    expected_min_speech_time: `float`, Optional.
        Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user).
        Defaults to never suspect anything (i.e. 0.0)
    manual : `bool`
        Whether the json is coming from a manual correction or straight from
        the forced-alignment output.
        In the former case, the regions timing is used. `confidence_threshold`
            and `collar` are thus irrelevant.
        In the latter case (default), the timing of each term is used.

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the gecko_JSON files (depends on confidence_threshold)
    """
    annotation = Annotation(uri, modality)
    not_annotated = Timeline(uri=uri)
    for monologue in gecko_JSON["monologues"]:
        if not monologue:
            continue
        # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt
        # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35
        speaker_ids = re.split("@|\+", monologue["speaker"]["id"])
        if manual:
            for speaker_id in speaker_ids:  # most of the time there's only one
                if speaker_id != '':  # happens with "all@"
                    annotation[Segment(monologue["start"], monologue["end"]),
                               speaker_id] = speaker_id
        else:
            for i, term in enumerate(monologue["terms"]):
                for speaker_id in speaker_ids:  # most of the time there's only one
                    if speaker_id != '':  # happens with "all@"
                        annotation[Segment(term["start"], term["end"]),
                                   speaker_id] = speaker_id
                if term["confidence"] <= confidence_threshold:
                    not_annotated.add(Segment(term["start"], term["end"]))

    if manual:
        annotated = Timeline([Segment(0.0, monologue["end"])], uri)
    else:
        annotation = annotation.support(collar)
        annotated = not_annotated.gaps(support=Segment(0.0, term["end"]))
    total_speech_time = annotation.crop(annotated).get_timeline().duration()
    if total_speech_time < expected_min_speech_time:
        warnings.warn(
            f"total speech time of {uri} is only {total_speech_time})")
    return annotation, annotated
예제 #32
0
def gecko_JSON_to_UEM(gecko_JSON,
                      uri=None,
                      modality='speaker',
                      confidence_threshold=0.0,
                      collar=0.0,
                      expected_min_speech_time=0.0):
    """
    Parameters:
    -----------
    gecko_JSON : `dict`
        loaded from a Gecko-compliant JSON as defined in xml_to_GeckoJSON
    uri (uniform resource identifier) : `str`
        which identifies the annotation (e.g. episode number)
        Default : None
    modality : `str`
        modality of the annotation as defined in https://github.com/pyannote/pyannote-core
    confidence_threshold : `float`, Optional.
        The segments with confidence under confidence_threshold won't be added to UEM file.
        Defaults to keep every segment (i.e. 0.0)
    collar: `float`, Optional.
        Merge tracks with same label and separated by less than `collar` seconds.
        Defaults to keep tracks timeline untouched (i.e. 0.0)
    expected_min_speech_time: `float`, Optional.
        Threshold (in seconds) under which the total duration of speech time is suspicious (warns the user).
        Defaults to never suspect anything (i.e. 0.0)

    Returns:
    --------
    annotation: pyannote `Annotation`
        for speaker identification/diarization as defined in https://github.com/pyannote/pyannote-core
    annotated: pyannote `Timeline`
        representing the annotated parts of the gecko_JSON files (depends on confidence_threshold)
    """
    annotation = Annotation(uri, modality)
    annotated = Timeline(uri=uri)
    last_confident = 0.0
    last_unconfident = 0.0
    for monologue in gecko_JSON["monologues"]:
        if not monologue:
            continue
        # '@' defined in https://github.com/hbredin/pyannote-db-plumcot/blob/develop/CONTRIBUTING.md#idepisodetxt
        # '+' defined in https://github.com/gong-io/gecko/blob/master/app/geckoModule/constants.js#L35
        speaker_ids = re.split("@|\+", monologue["speaker"]["id"])
        for i, term in enumerate(monologue["terms"]):
            term["confidence"], term["start"], term["end"] = map(
                float,
                (term.get("confidence", 0.), term["start"], term["end"]))
            unknown = False
            for speaker_id in speaker_ids:  # most of the time there's only one
                if '#unknown#' in speaker_id:
                    unknown = True
                if speaker_id != '':  # happens with "all@"
                    annotation[Segment(term["start"], term["end"]),
                               speaker_id] = speaker_id
            if term["confidence"] <= confidence_threshold:
                last_unconfident = term["end"]
            else:
                if last_unconfident < last_confident and not unknown:
                    annotated.add(Segment(last_confident, term["end"]))
                last_confident = term["start"]

    annotation = annotation.support(collar)
    total_speech_time = annotation.crop(annotated).get_timeline().duration()
    if total_speech_time < expected_min_speech_time:
        warnings.warn(
            f"total speech time of {uri} is only {total_speech_time})")
    return annotation, annotated.support()
예제 #33
0
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        if self.log_scale:
            data = np.exp(data)

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        onset = mini + self.onset * (maxi - mini)
        offset = mini + self.offset * (maxi - mini)

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.support()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration_on])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration_off:
                active.add(s)
        active = active.support()

        return active
예제 #34
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(focus=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(focus=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(focus=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        for segment in correct:
            status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'frontier'
        for segment in only_over:
            status[segment, '_'] = 'over'
        for segment in only_under:
            status[segment, '_'] = 'under'

        return status.smooth()
예제 #35
0
    def _sliding_samples(self):

        uris = list(self.data_)
        durations = np.array([self.data_[uri]["duration"] for uri in uris])
        probabilities = durations / np.sum(durations)
        sliding_segments = SlidingWindow(
            duration=self.duration, step=self.step * self.duration
        )

        while True:

            np.random.shuffle(uris)

            # loop on all files
            for uri in uris:

                datum = self.data_[uri]

                # make a copy of current file
                current_file = dict(datum["current_file"])

                # compute features for the whole file
                features = self.feature_extraction(current_file)

                # randomly shift 'annotated' segments start time so that
                # we avoid generating exactly the same subsequence twice
                annotated = Timeline()
                for segment in get_annotated(current_file):
                    shifted_segment = Segment(
                        segment.start + np.random.random() * self.duration, segment.end
                    )
                    if shifted_segment:
                        annotated.add(shifted_segment)

                samples = []
                for sequence in sliding_segments(annotated):

                    X = features.crop(sequence, mode="center", fixed=self.duration)
                    y = self.crop_y(datum["y"], sequence)
                    sample = {"X": X, "y": y}

                    if self.mask is not None:

                        # extract mask for current sub-segment
                        mask = current_file[self.mask].crop(
                            sequence, mode="center", fixed=self.duration
                        )

                        # it might happen that "mask" and "y" use different
                        # sliding windows. therefore, we simply resample "mask"
                        # to match "y"
                        if len(mask) != len(y):
                            mask = scipy.signal.resample(mask, len(y), axis=0)
                        sample["mask"] = mask

                    for key, classes in self.file_labels_.items():
                        sample[key] = classes.index(current_file[key])

                    samples.append(sample)

                np.random.shuffle(samples)
                for sample in samples:
                    yield sample
예제 #36
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(support=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(support=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(support=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        # for segment in correct:
        #     status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'shift'
        for segment in only_over:
            status[segment, '_'] = 'over-segmentation'
        for segment in only_under:
            status[segment, '_'] = 'under-segmentation'

        return status.support()