コード例 #1
0
def write_test_file(data_dir, output_file, trial_length):
    annotations, max_length, speakers = read_annotaitons(data_dir)
    # create an artificial non-overlapping segments each of the trial_length size
    trial_segments = Timeline()
    for i in range(0, int(max_length) // trial_length):
        trial_segments.add(Segment(start=i*trial_length, end=(i+1)*trial_length))

    with open(output_file, 'w') as f:
        for label in speakers.keys():
            for annotation in annotations:
                # make sure our trial segments are not extending beyond the total length of the speech data
                support = annotation.get_timeline().extent()
                # we consider smaller segment here to make sure an embedding of 3 seconds can be computed
                adjusted_trial_segments = trial_segments.crop(Segment(start=support.start, end=support.end - 3.),
                                                              mode='loose')
                uri = annotation.uri
                cur_timeline = annotation.label_timeline(label, copy=False)
                for trial_segment in adjusted_trial_segments:
                    cropped_speaker = cur_timeline.crop(trial_segment, mode='intersection')
                    if not cropped_speaker:
                        f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} nontarget - -\n'.format(
                            label,
                            uri,
                            trial_segment.start,
                            trial_segment.end))
                    else:
                        f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} target {4:0>7.2f} {5:0>7.2f}\n'.format(
                            label,
                            uri,
                            trial_segment.start,
                            trial_segment.end,
                            cropped_speaker[0].start,
                            cropped_speaker[0].duration))
コード例 #2
0
    def apply(self, features, segmentation=None):
        """
        Parameters
        ----------
        features : Features
        segmentation : Timeline, optional
        """

        if segmentation is None:
            segmentation = Timeline(segments=[features.getExtent()])

        sliding_window = features.sliding_window
        min_samples = sliding_window.durationToSamples(self.min_duration)
        precision = sliding_window.durationToSamples(self.precision)

        segmenter = SKLearnBICSegmentation(
            penalty_coef=self.penalty_coef,
            covariance_type=self.covariance_type,
            min_samples=min_samples,
            precision=precision)

        result = Timeline()

        for long_segment in segmentation:

            X = features.crop(long_segment)
            boundaries = segmenter.apply(X)
            for t, T in pairwise(boundaries):
                segment = sliding_window.rangeToSegment(t, T - t)
                shifted_segment = Segment(long_segment.start + segment.start,
                                          long_segment.start + segment.end)
                result.add(shifted_segment)

        return result
コード例 #3
0
ファイル: bic.py プロジェクト: pyannote/pyannote-algorithms
    def apply(self, features, segmentation=None):
        """
        Parameters
        ----------
        features : Features
        segmentation : Timeline, optional
        """

        if segmentation is None:
            segmentation = Timeline(segments=[features.getExtent()])

        sliding_window = features.sliding_window
        min_samples = sliding_window.durationToSamples(self.min_duration)
        precision = sliding_window.durationToSamples(self.precision)

        segmenter = SKLearnBICSegmentation(
            penalty_coef=self.penalty_coef,
            covariance_type=self.covariance_type,
            min_samples=min_samples,
            precision=precision)

        result = Timeline()

        for long_segment in segmentation:

            X = features.crop(long_segment)
            boundaries = segmenter.apply(X)
            for t, T in pairwise(boundaries):
                segment = sliding_window.rangeToSegment(t, T - t)
                shifted_segment = Segment(long_segment.start + segment.start,
                                          long_segment.start + segment.end)
                result.add(shifted_segment)

        return result
コード例 #4
0
    def _preprocess(self, reference, hypothesis):

        if not isinstance(reference, Annotation):
            raise TypeError('reference must be an instance of `Annotation`')

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # reference where short intra-label gaps are removed
        filled = Timeline()
        for label in reference.labels():
            label_timeline = reference.label_timeline(label)
            for gap in label_timeline.gaps():
                if gap.duration < self.tolerance:
                    label_timeline.add(gap)

            for segment in label_timeline.coverage():
                filled.add(segment)

        # reference coverage after filling gaps
        coverage = filled.coverage()

        reference_partition = self._partition(filled, coverage)
        hypothesis_partition = self._partition(hypothesis, coverage)

        return reference_partition, hypothesis_partition
コード例 #5
0
def test_get_overlap():
    annotation = Annotation()
    annotation[Segment(0, 5)] = "A"
    annotation[Segment(10, 15)] = "A"
    annotation[Segment(20, 25)] = "A"

    annotation[Segment(0, 10)] = "B"
    annotation[Segment(15, 25)] = "B"

    annotation[Segment(5, 10)] = "C"
    annotation[Segment(20, 30)] = "C"

    assert (annotation.get_overlap()
            ==
            Timeline([Segment(0, 10), Segment(20, 25)]))

    assert (annotation.get_overlap(["A", "B"])
            ==
            Timeline([Segment(0, 5), Segment(20, 25)]))

    assert (annotation.get_overlap(["A", "C"])
            ==
            Timeline([Segment(20, 25)]))

    assert (annotation.get_overlap(["B", "C"])
            ==
            Timeline([Segment(5, 10), Segment(20, 25)]))
コード例 #6
0
ファイル: util.py プロジェクト: yinruiqing/pyannote-database
def get_annotated(current_file):

    # if protocol provides 'annotated' key, use it
    if 'annotated' in current_file:
        annotated = current_file['annotated']
        return annotated

    # if it does not, but does provide 'wav' key
    # try and use wav duration
    if 'wav' in current_file:
        wav = current_file['wav']
        try:
            from pyannote.audio.features.utils import get_wav_duration
            duration = get_wav_duration(wav)
        except ImportError as e:
            pass
        else:
            warnings.warn('"annotated" was approximated by "wav" duration.')
            annotated = Timeline([Segment(0, duration)])
            return annotated

    warnings.warn('"annotated" was approximated by "annotation" extent.')
    extent = current_file['annotation'].get_timeline().extent()
    annotated = Timeline([extent])
    return annotated
コード例 #7
0
    def _preprocess(self, reference, hypothesis):

        if not isinstance(reference, Annotation):
            raise TypeError('reference must be an instance of `Annotation`')

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # reference where short intra-label gaps are removed
        filled = Timeline()
        for label in reference.labels():
            label_timeline = reference.label_timeline(label)
            for gap in label_timeline.gaps():
                if gap.duration < self.tolerance:
                    label_timeline.add(gap)

            for segment in label_timeline.support():
                filled.add(segment)

        # reference coverage after filling gaps
        coverage = filled.support()

        reference_partition = self._partition(filled, coverage)
        hypothesis_partition = self._partition(hypothesis, coverage)

        return reference_partition, hypothesis_partition
コード例 #8
0
def test_initialized_with_empty_segments():
  # The first timeline includes empty segments.
  first_timeline = Timeline([Segment(1, 5), Segment(6, 6), Segment(7, 7), Segment(8, 10)])

  # The second has no empty segments.
  second_timeline = Timeline([Segment(1, 5), Segment(8, 10)])

  assert first_timeline == second_timeline
コード例 #9
0
def test_union_extent():
    first_timeline = Timeline([Segment(0, 1),
                               Segment(2, 3),
                               Segment(4, 5)])
    second_timeline = Timeline([Segment(1.5, 6)])

    union_timeline = first_timeline.union(second_timeline)
    assert union_timeline.extent() == Segment(0, 6)
コード例 #10
0
ファイル: signal.py プロジェクト: zhiqizhang/pyannote-audio
    def apply(self, predictions, dimension=0):
        """Peak detection

        Parameter
        ---------
        predictions : SlidingWindowFeature
            Predictions returned by segmentation approaches.

        Returns
        -------
        segmentation : Timeline
            Partition.
        """

        if len(predictions.data.shape) == 1:
            y = predictions.data
        elif predictions.data.shape[1] == 1:
            y = predictions.data[:, 0]
        else:
            y = predictions.data[:, dimension]

        if self.log_scale:
            y = np.exp(y)

        sw = predictions.sliding_window

        precision = sw.step
        order = max(1, int(np.rint(self.min_duration / precision)))
        indices = scipy.signal.argrelmax(y, order=order)[0]

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(y)
            maxi = np.nanmax(y)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(y, 1)
            maxi = np.nanpercentile(y, 99)

        threshold = mini + self.alpha * (maxi - mini)

        peak_time = np.array(
            [sw[i].middle for i in indices if y[i] > threshold])

        n_windows = len(y)
        start_time = sw[0].start
        end_time = sw[n_windows].end

        boundaries = np.hstack([[start_time], peak_time, [end_time]])
        segmentation = Timeline()
        for i, (start, end) in enumerate(pairwise(boundaries)):
            segment = Segment(start, end)
            segmentation.add(segment)

        return segmentation
コード例 #11
0
def test_crop_mapping():
    timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)])
    cropped, mapping = timeline.crop(Segment(1, 2), returns_mapping=True)

    expected_cropped = Timeline([Segment(1, 2)])
    assert cropped == expected_cropped

    expected_mapping = {Segment(1, 2): [Segment(0, 2), Segment(1, 2)]}
    assert mapping == expected_mapping
コード例 #12
0
ファイル: signal.py プロジェクト: instinct2k18/pyannote-audio
    def apply(self, predictions, dimension=0):
        """Peak detection

        Parameter
        ---------
        predictions : SlidingWindowFeature
            Predictions returned by segmentation approaches.

        Returns
        -------
        segmentation : Timeline
            Partition.
        """

        if len(predictions.data.shape) == 1:
            y = predictions.data
        elif predictions.data.shape[1] == 1:
            y = predictions.data[:, 0]
        else:
            y = predictions.data[:, dimension]

        if self.log_scale:
            y = np.exp(y)

        sw = predictions.sliding_window

        precision = sw.step
        order = max(1, int(np.rint(self.min_duration / precision)))
        indices = scipy.signal.argrelmax(y, order=order)[0]

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        threshold = mini + self.alpha * (maxi - mini)

        peak_time = np.array([sw[i].middle for i in indices if y[i] > threshold])

        n_windows = len(y)
        start_time = sw[0].start
        end_time = sw[n_windows].end

        boundaries = np.hstack([[start_time], peak_time, [end_time]])
        segmentation = Timeline()
        for i, (start, end) in enumerate(pairwise(boundaries)):
            segment = Segment(start, end)
            segmentation.add(segment)

        return segmentation
コード例 #13
0
def test_remove_and_extent():
    t = Timeline(uri='MyAudioFile')
    t.add(Segment(6, 8))
    t.add(Segment(7, 9))
    t.add(Segment(6, 9))

    t.remove(Segment(6, 9))
    assert t.extent() == Segment(6, 9)
コード例 #14
0
    def run(self):
        with self.in_subtitles().open('r') as fp:
            transcription = pyannote.core.json.load(fp)
        timeline = Timeline()
        for start, end, edge in transcription.ordered_edges_iter(data=True):
            if 'subtitle' not in edge:
                continue
            segment = Segment(start, end)
            timeline.add(segment)

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(timeline, fp)
コード例 #15
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        if sequence in [Stream.EndOfStream, Stream.NoNewData]:
            return sequence

        data = sequence.data
        active = data[0]

        sw = sequence.sliding_window
        start = sw[0].middle

        timeline = Timeline()
        timeline.start = start

        for i, y in enumerate(data):
            if active and not y:
                segment = Segment(start, sw[i].middle)
                timeline.add(segment)
                active = False
            elif not active and y:
                active = True
                start = sw[i].middle

        if active:
            segment = Segment(start, sw[i].middle)
            timeline.add(segment)

        timeline.end = sw[i].middle

        return timeline
def overlap_timeline(uri, annotation):
    timeline = annotation.get_timeline()
    segmentation = timeline.segmentation()
    l_segments = [{'seg': segment, 'count': 0} for segment in segmentation]
    #print(l_segments)
    for seg in timeline:
        for curr in l_segments:
            if curr['seg'] in seg:
                curr['count'] += 1
    overlap_timeline = Timeline(uri=uri)
    for curr in l_segments:
        if curr['count'] > 1:
            overlap_timeline.add(curr['seg'])
    return overlap_timeline
コード例 #17
0
    def preprocess(self, current_file, identifier=None):
        """Pre-compute file-wise X and y"""

        # extract features for the whole file
        # (if it has not been done already)
        current_file = self.periodic_preprocess(current_file,
                                                identifier=identifier)

        # if labels have already been extracted, do nothing
        if identifier in self.preprocessed_.setdefault('y', {}):
            return current_file

        # get features as pyannote.core.SlidingWindowFeature instance
        X = self.preprocessed_['X'][identifier]
        sw = X.sliding_window
        n_samples = X.getNumber()

        y = np.zeros((n_samples + 4, 1), dtype=np.int8) - 1
        # [-1] ==> unknown / [0] ==> not change part / [1] ==> change part

        annotated = get_annotated(current_file)
        annotation = current_file['annotation']

        segments = []
        for segment, _ in annotation.itertracks():
            segments.append(
                Segment(segment.start - self.balance,
                        segment.start + self.balance))
            segments.append(
                Segment(segment.end - self.balance,
                        segment.end + self.balance))
        change_part = Timeline(segments).support().crop(annotated,
                                                        mode='intersection')

        # iterate over non-change regions
        for non_changes in change_part.gaps(annotated):
            indices = sw.crop(non_changes, mode='loose')
            y[indices, 0] = 0

        # iterate over change regions
        for changes in change_part:
            indices = sw.crop(changes, mode='loose')
            y[indices, 0] = 1

        y = SlidingWindowFeature(y[:-1], sw)
        self.preprocessed_['y'][identifier] = y

        return current_file
コード例 #18
0
def test_consistent_timelines_with_empty_segments():
  # The first timeline is initialized with Segments, some empty.
  first_timeline = Timeline([Segment(1, 5), Segment(6, 6), Segment(7, 7), Segment(8, 10)])

  # The second timeline adds one Segment at a time, including empty ones.
  second_timeline = Timeline()
  second_timeline.add(Segment(1, 5))
  second_timeline.add(Segment(6, 6))
  second_timeline.add(Segment(7, 7))
  second_timeline.add(Segment(8, 10))

  assert first_timeline == second_timeline
コード例 #19
0
    def tst_enrol_iter(self):
        # load enrolments
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        enrolments = data_dir / 'tst.enrol.txt'
        names = ['uri', 'NA0', 'start', 'duration',
                 'NA1', 'NA2', 'NA3', 'model_id']
        enrolments = read_table(enrolments, delim_whitespace=True, names=names)

        for model_id, turns in enrolments.groupby(by=['uri', 'model_id']):

            # gather enrolment data
            segments = []
            uri = ''
            for t, turn in enumerate(turns.itertuples()):
                if t == 0:
                    uri = turn.uri
                segment = Segment(start=turn.start,
                                  end=turn.start + turn.duration)
                if segment:
                    segments.append(segment)
            enrol_with = Timeline(segments=segments, uri=uri)

            current_enrolment = {
                'database': 'Odessa',
                'uri': uri,
                'model_id': model_id[1],  # model_id
                'enrol_with': enrol_with,
            }

            yield current_enrolment
コード例 #20
0
    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotations
        path = op.join(
            data_dir,
            'librispeech-{protocol}.{subset}.mdtm'.format(subset=subset,
                                                          protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(mdtms.uris):
            annotation = mdtms(uri)
            current_file = {
                'database':
                'LibriSpeech',
                'uri':
                uri,
                'annotation':
                annotation,
                # annotated part as pyannote.core.Timeline instance
                'annotated':
                Timeline(uri=uri,
                         segments=[annotation.get_timeline().extent()])
            }

            yield current_file
コード例 #21
0
    def _subset_enrollment(self, protocol, subset):
        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        enrolments = op.join(data_dir, '{protocol}.{subset}.txt'.format(subset=subset, protocol=protocol))
        names = ['uri', 'NA0', 'start', 'duration', 'NA1', 'NA2', 'NA3', 'model_id']
        enrolments = read_table(enrolments, delim_whitespace=True, names=names)

        for model_id, turns in enrolments.groupby(by='model_id'):

            # gather enrolment data
            segments = []
            for t, turn in enumerate(turns.itertuples()):
                if t == 0:
                    raw_uri = turn.uri
                    uri = f'{raw_uri}'
                segment = Segment(start=turn.start, end=turn.start + turn.duration)
                if segment:
                    segments.append(segment)
            enrol_with = Timeline(segments=segments, uri=uri)

            current_enrolment = {
                'database': 'RTVE2018',
                'uri': uri,
                'model_id': model_id,
                'enrol_with': enrol_with,
            }

            yield current_enrolment
コード例 #22
0
    def _xxx_enrol_iter(self, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])

        trial_csv = op.join(
            data_dir,
            'voxceleb1.verification.{subset}.csv'.format(subset=subset))
        trials = pd.read_csv(trial_csv)

        for model_id in trials['enrolment'].unique():

            try:
                row = data.ix[model_id]
            except KeyError as e:
                # file_id = model_id.split('/')[1][:-8]
                # msg = '{file_id} marked as duplicate in VoxCeleb 1.1'
                # warnings.warn(msg.format(file_id=file_id))
                continue

            uri = model_id
            segment = Segment(0., row.end - row.start)
            current_enrolment = {
                'database': 'VoxCeleb',
                'uri': uri,
                'model_id': model_id,
                'enrol_with': Timeline(uri=uri, segments=[segment]),
            }

            yield current_enrolment
コード例 #23
0
def test_union():
    first_timeline = Timeline([Segment(0, 1),
                               Segment(2, 3),
                               Segment(4, 5)])
    second_timeline = Timeline([Segment(1.5, 4.5)])

    assert first_timeline.union(second_timeline) == Timeline([Segment(0, 1),
                                                              Segment(1.5, 4.5),
                                                              Segment(2, 3),
                                                              Segment(4, 5)])

    assert second_timeline.crop(first_timeline) == Timeline([Segment(2, 3),
                                                             Segment(4, 4.5)])

    assert list(first_timeline.co_iter(second_timeline)) == [(Segment(2, 3), Segment(1.5, 4.5)),
                                                             (Segment(4, 5), Segment(1.5, 4.5))]
コード例 #24
0
ファイル: util.py プロジェクト: Rehan-Ahmad/pyannote-database
def load_uem(file_uem):
    """Load UEM file

    Parameter
    ---------
    file_uem : `str`
        Path to UEM file.

    Returns
    -------
    timelines : `dict`
        Evaluation map as a {uri: pyannote.core.Timeline} dictionary.
    """

    names = ['uri', 'NA1', 'start', 'end']
    dtype = {'uri': str, 'start': float, 'end': float}
    data = pd.read_csv(file_uem, names=names, dtype=dtype,
                       delim_whitespace=True)

    timelines = dict()
    for uri, parts in data.groupby('uri'):
        segments = [Segment(part.start, part.end)
                    for i, part in parts.iterrows()]
        timelines[uri] = Timeline(segments=segments, uri=uri)

    return timelines
コード例 #25
0
    def common_enrol_iter(self):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])

        data = data.groupby('identification').get_group('trn')

        for model_id, model_rows in data.groupby('speaker'):
            uris = []
            enrol_with = []
            for uri, rows in model_rows.groupby('uri'):
                uris.append(uri)
                segments = []
                for row in rows.itertuples():
                    segments.append(Segment(row.start, row.end))
                enrol_with.append(Timeline(uri=uri, segments=segments))

            current_enrolment = {
                'database': 'VoxCeleb',
                'model_id': model_id,
                'uri': uris,
                'enrol_with': enrol_with
            }

            yield current_enrolment
コード例 #26
0
def test_extrude():
    annotation = Annotation()
    annotation[Segment(0, 10)] = "A"
    annotation[Segment(15, 20)] = "A"
    annotation[Segment(20, 35)] = "B"
    annotation[Segment(15, 25)] = "C"
    annotation[Segment(30, 35)] = "C"

    extrusion_tl = Timeline([Segment(5, 12),
                             Segment(14, 25)])

    intersection_expected = Annotation()
    intersection_expected[Segment(0, 5)] = "A"
    intersection_expected[Segment(25, 35)] = "B"
    intersection_expected[Segment(30, 35)] = "C"

    assert (annotation.extrude(extrusion_tl, mode="intersection")
            ==
            intersection_expected)

    loose_expected = Annotation()
    loose_expected[Segment(30, 35)] = "C"

    assert (annotation.extrude(extrusion_tl, mode="loose")
            ==
            loose_expected)

    strict_expected = Annotation()
    strict_expected[Segment(0, 10)] = "A"
    strict_expected[Segment(20, 35)] = "B"
    strict_expected[Segment(30, 35)] = "C"

    assert (annotation.extrude(extrusion_tl, mode="strict")
            ==
            strict_expected)
コード例 #27
0
def predict(audio, algorithm='SpectralClustering'):
    # Speech Activation Detection

    sad_scores = sad(audio)
    binarize_sad = Binarize(offset=0.52,
                            onset=0.52,
                            log_scale=True,
                            min_duration_off=0.1,
                            min_duration_on=0.1)
    speech = binarize_sad.apply(sad_scores, dimension=1)

    # Speaker Change Detection

    scd_scores = scd(audio)
    peak = Peak(alpha=0.10, min_duration=0.10, log_scale=True)
    partition = peak.apply(scd_scores, dimension=1)

    # Overlapped Speech Detection

    # ovl_scores = ovl(audio)
    # binarize_ovl = Binarize(offset=0.55, onset=0.55, log_scale=True,
    #                         min_duration_off=0.1, min_duration_on=0.1)
    # overlap = binarize_ovl.apply(ovl_scores, dimension=1)

    # Speaker Embedding

    speech_turns = partition.crop(speech)
    embeddings = emb(audio)

    long_turns = Timeline(
        segments=[s for s in speech_turns if s.duration > .5])

    return long_turns, sad_scores, scd_scores, embeddings
コード例 #28
0
    def tst_iter(self):

        # absolute path to 'data' directory where annotations are stored
        data_dir = Path(__file__).parent / 'data' / 'speaker_diarization'

        annotated = data_dir / 'fullset.uem'
        names = ['uri', 'NA0', 'start', 'end']
        annotated = read_table(annotated, delim_whitespace=True, names=names)
        annotated_segments = {}
        for segment in annotated.itertuples():
            annotated_segments[segment.uri] = Segment(start=segment.start, end=segment.end)

        # iterate through the text annotation files
        for filename in os.listdir(data_dir):
            if filename.endswith(".txt"):
                uri, _ = os.path.splitext(os.path.basename(filename))
                annotation = Annotation(uri=uri)

                names = ['start', 'end', 'speaker', 'speakerID']
                parsed_file = read_table(os.path.join(data_dir, filename), delim_whitespace=True, names=names)
                for t, turn in enumerate(parsed_file.itertuples()):
                    segment = Segment(start=turn.start,
                                      end=turn.end)
                    annotation[segment, t] = turn.speakerID

                current_file = {
                    'database': 'Odessa',
                    'uri': uri,
                    'annotated': Timeline(uri=uri, segments=[annotated_segments[uri]]),
                    'annotation': annotation}

                yield current_file
コード例 #29
0
    def _xxx_iter(self, subset):

        data = self._load_data(subset)

        AnnotatedGroups = data['annotated'].groupby(by='uri')
        AnnotationGroups = data['annotation'].groupby(by='uri')

        for raw_uri, annotated in AnnotatedGroups:

            uri = f'{raw_uri}.Mix-Headset'

            segments = []
            for segment in annotated.itertuples():
                segments.append(Segment(start=segment.start, end=segment.end))

            annotation = Annotation(uri=uri)
            for t, turn in enumerate(
                    AnnotationGroups.get_group(raw_uri).itertuples()):
                segment = Segment(start=turn.start,
                                  end=turn.start + turn.duration)
                annotation[segment, t] = turn.speaker

            current_file = {
                'database': 'Test',
                'uri': uri,
                'annotated': Timeline(uri=uri, segments=segments),
                'annotation': annotation
            }

            yield current_file
コード例 #30
0
def uem_timeline_from_file(uem_file, uniq_name=''):
    """
    outputs pyannote timeline segments for uem file

     <UEM> file format
     UNIQ_SPEAKER_ID CHANNEL START_TIME END_TIME
    """
    timeline = Timeline(uri=uniq_name)
    with open(uem_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            speaker_id, channel, start_time, end_time = line.split()
            timeline.add(Segment(float(start_time), float(end_time)))

    return timeline
コード例 #31
0
    def _xxx_enrol_iter(self, subset):

        # load enrolments
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        enrolments = data_dir / f'{subset}.enrol.txt'
        names = [
            'uri', 'NA0', 'start', 'duration', 'NA1', 'NA2', 'NA3', 'model_id'
        ]
        enrolments = read_table(enrolments, delim_whitespace=True, names=names)

        for model_id, turns in enrolments.groupby(by='model_id'):

            # gather enrolment data
            segments = []
            for t, turn in enumerate(turns.itertuples()):
                if t == 0:
                    raw_uri = turn.uri
                    uri = f'{raw_uri}.Mix-Headset'
                segment = Segment(start=turn.start,
                                  end=turn.start + turn.duration)
                if segment:
                    segments.append(segment)
            enrol_with = Timeline(segments=segments, uri=uri)

            current_enrolment = {
                'database': 'Test',
                'uri': uri,
                'model_id': model_id,
                'enrol_with': enrol_with,
            }

            yield current_enrolment
コード例 #32
0
def DER(outfile, AudioDataSet, annotationlist, audioLength):
    reference = Annotation()

    if not AudioDataSet == 'DiaExample':
        treeA = ET.parse(annotationlist[0])
        rootA = treeA.getroot()
        for child in rootA.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'A'

        treeB = ET.parse(annotationlist[1])
        rootB = treeB.getroot()
        for child in rootB.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'B'

        treeC = ET.parse(annotationlist[2])
        rootC = treeC.getroot()
        for child in rootC.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'C'

        treeD = ET.parse(annotationlist[3])
        rootD = treeD.getroot()
        for child in rootD.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'D'
    else:
        reference = Annotation()
        reference[Segment(0.15, 3.41)] = 'A'
        reference[Segment(3.83, 5.82)] = 'A'
        reference[Segment(6.75, 11.10)] = 'B'
        reference[Segment(11.32, 15.8)] = 'C'
        reference[Segment(15.9, 18.8)] = 'B'
        reference[Segment(18.8, 27.8)] = 'C'
        reference[Segment(27.8, 34.4)] = 'B'
        reference[Segment(34.4, 42)] = 'D'

    hypothesis = Annotation()
    f = open(outfile, 'r')
    for line in f.readlines():
        start = float(line.split(' ')[3])
        end = start + float(line.split(' ')[4])
        annotation = line.split(' ')[5][0:-1]
        hypothesis[Segment(start, end)] = annotation
    f.close()
    metric = DiarizationErrorRate()
    metricPurity = DiarizationPurity()
    uem = Timeline([Segment(0, audioLength)])

    print('DER: %.2f %%' % (metric(reference, hypothesis, uem=uem) * 100))
    print('Cluster Purity: %.2f %%' %
          (metricPurity(reference, hypothesis, uem=uem) * 100))

    return metric, reference, hypothesis
コード例 #33
0
def load_sad_manual(dataset: Text, path: Text) -> Dict:
    """Load accepted pyannote.sad.manual examples

    Parameters
    ----------
    dataset : str
        Dataset containing annotations.
    path : str
        Path to annotated file

    Returns
    -------
    file : dict
        Dictionary containing the following keys:
        "audio" (Path) : path to audio file
        "annotated" (Timeline) : part of the audio annotated and accepted
        "speech" (Timeline) : part of the audio accepted as speech
    """

    db = connect()

    examples = [
        eg
        for eg in db.get_dataset(dataset)
        if eg["recipe"] == "pyannote.sad.manual"
        and eg["path"] == path
        and eg["answer"] == "accept"
    ]

    speech = Timeline(
        segments=[
            Segment(span["start"], span["end"])
            for eg in examples
            for span in eg["audio_spans"]
        ],
    ).support()

    annotated = Timeline(segments=[Segment(**eg["chunk"]) for eg in examples]).support()

    prodigy.log(f"RECIPE: {path}: loaded speech regions")

    return {
        "audio": Path(path),
        "speech": speech,
        "annotated": annotated,
    }
コード例 #34
0
ファイル: stream.py プロジェクト: instinct2k18/pyannote-audio
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        if sequence in [Stream.EndOfStream, Stream.NoNewData]:
            return sequence

        data = sequence.data
        active = data[0]

        sw = sequence.sliding_window
        start = sw[0].middle

        timeline = Timeline()
        timeline.start = start

        for i, y in enumerate(data):
            if active and not y:
                segment = Segment(start, sw[i].middle)
                timeline.add(segment)
                active = False
            elif not active and y:
                active = True
                start = sw[i].middle

        if active:
            segment = Segment(start, sw[i].middle)
            timeline.add(segment)

        timeline.end = sw[i].middle

        return timeline
コード例 #35
0
def get_annotated(current_file):
    """Get part of the file that is annotated.

    Parameters
    ----------
    current_file : `dict`
        File generated by a `
pyannote.database` protocol.

    Returns
    -------
    annotated : `pyannote.core.Timeline`
        Part of the file that is annotated. Defaults to
        `current_file["annotated"]`. When it does not exist, try to use the
        full audio extent. When that fails, use "annotation" extent.
    """

    # if protocol provides 'annotated' key, use it
    if "annotated" in current_file:
        annotated = current_file["annotated"]
        return annotated

    # if it does not, but does provide 'audio' key
    # try and use wav duration

    if "duration" in current_file:
        try:
            duration = current_file["duration"]
        except ImportError:
            pass
        else:
            annotated = Timeline([Segment(0, duration)])
            msg = '"annotated" was approximated by [0, audio duration].'
            warnings.warn(msg)
            return annotated

    extent = current_file["annotation"].get_timeline().extent()
    annotated = Timeline([extent])

    msg = ('"annotated" was approximated by "annotation" extent. '
           'Please provide "annotated" directly, or at the very '
           'least, use a "duration" preprocessor.')
    warnings.warn(msg)

    return annotated
コード例 #36
0
    def apply(self, current_file):

        # extract precomputed scores
        precomputed = self.precomputed_(current_file)

        # if this check has not been done yet, do it once and for all
        if not hasattr(self, "log_scale_"):
            # heuristic to determine whether scores are log-scaled
            if np.nanmean(precomputed.data) < 0:
                self.log_scale_ = True
            else:
                self.log_scale_ = False

        data = np.exp(precomputed.data) if self.log_scale_ \
               else precomputed.data

        # speech vs. non-speech
        speech_prob = SlidingWindowFeature(
            1. - data[:, 0],
            precomputed.sliding_window)
        speech = self.speech_binarize_.apply(speech_prob)

        if self.has_overlap_:

            # overlap vs. non-overlap
            overlap_prob = SlidingWindowFeature(
                data[:, 2], precomputed.sliding_window)
            overlap = self.overlap_binarize_.apply(overlap_prob)

            # overlap speech can only happen in speech regions
            overlap = overlap.crop(speech)
        else:
            # empty timeline
            overlap = Timeline()

        speech = speech.to_annotation(generator='string')
        overlap = overlap.to_annotation(generator='int')
        hypothesis = speech.update(overlap)

        return hypothesis
コード例 #37
0
    def apply(self, feature, segmentation=None):

        if segmentation is None:
            focus = feature.getExtent()
            segmentation = Timeline(segments=[focus], uri=None)

        result = Timeline()
        for focus in segmentation:

            x, y = list(zip(*[
                (m, d) for m, d in self.iterdiff(feature, focus)
            ]))
            x = np.array(x)
            y = np.array(y)

            # find local maxima
            order = 1
            if self.min_duration > 0:
                order = int(self.min_duration / self.step)
            maxima = scipy.signal.argrelmax(y, order=order)

            x = x[maxima]
            y = y[maxima]

            # only keep high enough local maxima
            high_maxima = np.where(y > self.threshold)

            # create list of segment boundaries
            # do not forget very first and last boundaries
            boundaries = itertools.chain(
                [focus.start], x[high_maxima], [focus.end]
            )

            # create list of segments from boundaries
            segments = [Segment(*p) for p in pairwise(boundaries)]

            result.update(Timeline(segments=segments))

        return result
コード例 #38
0
ファイル: base.py プロジェクト: pombredanne/pyannote-parser
    def read(self, path, uri=None, **kwargs):

        # load whole file
        df = pandas.read_table(path,
                               delim_whitespace=True,
                               header=None, names=self.fields(),
                               comment=self.comment(),
                               converters=self.converters())

        # remove comment lines
        # (i.e. lines for which all fields are either None or NaN)
        keep = [not all([pandas.isnull(r[n]) for n in self.fields()])
                for _, r in df.iterrows()]
        df = df[keep]

        # add 'segment' column build from start time & duration
        df[PYANNOTE_SEGMENT] = [self.get_segment(row)
                                for r, row in df.iterrows()]

        # add uri column in case it does not exist
        if PYANNOTE_URI not in df:
            if uri is None:
                raise ValueError('missing uri -- use uri=')
            df[PYANNOTE_URI] = uri

        # obtain list of resources
        uris = list(df[PYANNOTE_URI].unique())

        self._loaded = {}

        # loop on resources
        for uri in uris:

            # filter based on resource
            df_ = df[df[PYANNOTE_URI] == uri]

            t = Timeline.from_df(df_, uri=uri)
            self._loaded[uri, None] = t

        return self
コード例 #39
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_speaker().open('r') as fp:
            speaker = pyannote.core.json.load(fp)

        timeline = Timeline()
        for segment, _ in speaker.itertracks():
            timeline.add(segment)

        # fill gaps
        for gap in timeline.gaps(extent):
            if gap.duration < self.fill_gaps:
                timeline.add(gap)

        timeline = timeline.coverage()

        # dump as annotation...
        if self.to_annotation:

            annotation = Annotation()
            for s, segment in enumerate(timeline):
                annotation[segment] = s
            annotation = annotation.anonymize_labels(generator='string')

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(annotation, fp)

        # ... or as timeline
        else:

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(timeline, fp)
コード例 #40
0
ファイル: utils.py プロジェクト: pyannote/pyannote-metrics
    def _get_collar(self, reference, duration):

        # initialize empty timeline
        collar = Timeline(uri=reference.uri)

        if duration == 0.:
            return collar

        # iterate over all segments in reference
        for segment in reference.itersegments():

            # add collar centered on start time
            t = segment.start
            collar.add(Segment(t - .5 * duration, t + .5 * duration))

            # add collar centered on end time
            t = segment.end
            collar.add(Segment(t - .5 * duration, t + .5 * duration))

        # merge overlapping collars and return
        return collar.coverage()
コード例 #41
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(focus=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(focus=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(focus=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        for segment in correct:
            status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'frontier'
        for segment in only_over:
            status[segment, '_'] = 'over'
        for segment in only_under:
            status[segment, '_'] = 'under'

        return status.smooth()
コード例 #42
0
def test_crop(timeline):

    selection = Segment(3,7)

    expected_answer = Timeline(uri='MyAudioFile')
    expected_answer.add(Segment(3, 4))
    expected_answer.add(Segment(5, 7))
    expected_answer.add(Segment(6, 7))

    assert timeline.crop(selection, mode='intersection') == expected_answer

    expected_answer = Timeline(uri='MyAudioFile')
    expected_answer.add(Segment(5, 7))
    assert timeline.crop(selection, mode='strict') == expected_answer


    expected_answer = Timeline(uri="pouet")
    expected_answer.add(Segment(1, 4))
    expected_answer.add(Segment(5, 7))
    expected_answer.add(Segment(6, 8))

    timeline.crop(selection, mode='loose') == expected_answer
コード例 #43
0
def timeline():
    t = Timeline(uri='MyAudioFile')
    t.add(Segment(6, 8))
    t.add(Segment(0.5, 3))
    t.add(Segment(8.5, 10))
    t.add(Segment(1, 4))
    t.add(Segment(5, 7))
    t.add(Segment(7, 8))

    return t
コード例 #44
0
ファイル: signal.py プロジェクト: instinct2k18/pyannote-audio
    def apply(self, predictions, dimension=0):
        """
        Parameters
        ----------
        predictions : SlidingWindowFeature
            Must be mono-dimensional
        dimension : int, optional
            Which dimension to process
        """

        if len(predictions.data.shape) == 1:
            data = predictions.data
        elif predictions.data.shape[1] == 1:
            data = predictions.data[:, 0]
        else:
            data = predictions.data[:, dimension]

        if self.log_scale:
            data = np.exp(data)

        n_samples = predictions.getNumber()
        window = predictions.sliding_window
        timestamps = [window[i].middle for i in range(n_samples)]

        # initial state
        start = timestamps[0]
        label = data[0] > self.onset

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        onset = mini + self.onset * (maxi - mini)
        offset = mini + self.offset * (maxi - mini)

        # timeline meant to store 'active' segments
        active = Timeline()

        for t, y in zip(timestamps[1:], data[1:]):

            # currently active
            if label:
                # switching from active to inactive
                if y < offset:
                    segment = Segment(start - self.pad_onset,
                                      t + self.pad_offset)
                    active.add(segment)
                    start = t
                    label = False

            # currently inactive
            else:
                # switching from inactive to active
                if y > onset:
                    start = t
                    label = True

        # if active at the end, add final segment
        if label:
            segment = Segment(start - self.pad_onset, t + self.pad_offset)
            active.add(segment)

        # because of padding, some 'active' segments might be overlapping
        # therefore, we merge those overlapping segments
        active = active.support()

        # remove short 'active' segments
        active = Timeline(
            [s for s in active if s.duration > self.min_duration_on])

        # fill short 'inactive' segments
        inactive = active.gaps()
        for s in inactive:
            if s.duration < self.min_duration_off:
                active.add(s)
        active = active.support()

        return active
コード例 #45
0
    def validate_epoch(self, epoch, protocol_name, subset='development',
                       validation_data=None):

        target_precision = self.precision

        # load model for current epoch
        model = self.load_model(epoch).to(self.device)
        model.eval()

        if isinstance(self.feature_extraction_, Precomputed):
            self.feature_extraction_.use_memmap = False

        duration = self.task_.duration
        step = .25 * duration
        sequence_labeling = SequenceLabeling(
            model, self.feature_extraction_, duration=duration,
            step=.25 * duration, batch_size=self.batch_size,
            source='audio', device=self.device)

        protocol = get_protocol(protocol_name, progress=False,
                                preprocessors=self.preprocessors_)

        predictions = {}
        references = {}

        file_generator = getattr(protocol, subset)()
        for current_file in file_generator:
            uri = get_unique_identifier(current_file)

            # build overlap reference
            reference = Timeline(uri=uri)
            annotation = current_file['annotation']
            for track1, track2 in annotation.co_iter(annotation):
                if track1 == track2:
                    continue
                reference.add(track1[0] & track2[0])
            references[uri] = reference.to_annotation()

            # extract overlap scores
            scores = sequence_labeling.apply(current_file)

            if model.logsoftmax:
                scores = SlidingWindowFeature(
                    np.exp(scores.data[:, 2]), scores.sliding_window)
            else:
                scores = SlidingWindowFeature(
                    scores.data[:, 2], scores.sliding_window)

            predictions[uri] = scores

        # dichotomic search to find threshold that maximizes recall
        # while having at least `target_precision`

        lower_alpha = 0.
        upper_alpha = 1.
        best_alpha = .5 * (lower_alpha + upper_alpha)
        best_recall = 0.

        for _ in range(10):
            current_alpha = .5 * (lower_alpha + upper_alpha)
            binarizer = Binarize(onset=current_alpha,
                                 offset=current_alpha,
                                 log_scale=False)

            precision = DetectionPrecision()
            recall = DetectionRecall()

            for current_file in getattr(protocol, subset)():
                uri = get_unique_identifier(current_file)
                reference = references[uri]
                hypothesis = binarizer.apply(predictions[uri], dimension=0)
                hypothesis = hypothesis.to_annotation()
                uem = get_annotated(current_file)
                _ = precision(reference, hypothesis, uem=uem)
                _ = recall(reference, hypothesis, uem=uem)

            if abs(precision) < target_precision:
                # precision is not high enough: try higher thresholds
                lower_alpha = current_alpha
            else:
                upper_alpha = current_alpha
                r = abs(recall)
                if r > best_recall:
                    best_recall = r
                    best_alpha = current_alpha

        task = 'overlap_speech_detection'
        metric_name = f'{task}/recall@{target_precision:.2f}precision'
        return {
            metric_name: {'minimize': False, 'value': best_recall},
            f'{task}/threshold': {'minimize': 'NA', 'value': best_alpha}}