Ejemplo n.º 1
0
    def _xxx_iter(self, subset):

        if not isinstance(subset, list):
            subsets = [subset]
        else:
            subsets = subset

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('verification')

        for subset in subsets:

            subset_data.get_group(subset)

            for uri, rows in subset_data.groupby('uri'):
                annotation = Annotation(uri=uri)
                for row in rows.itertuples():
                    segment = Segment(row.start, row.end)
                    annotation[segment] = row.speaker
                annotated = annotation.get_timeline()

                current_file = {
                    'uri': uri,
                    'database': 'VoxCeleb',
                    'annotation': annotation,
                    'annotated': annotated,
                }

                yield current_file
Ejemplo n.º 2
0
    def _xxx_iter(self, subset):

        if not isinstance(subset, list):
            subsets = [subset]
        else:
            subsets = subset

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('verification')

        for subset in subsets:

            subset_data.get_group(subset)

            for uri, rows in subset_data.groupby('uri'):
                annotation = Annotation(uri=uri)
                for row in rows.itertuples():
                    segment = Segment(row.start, row.end)
                    annotation[segment] = row.speaker
                annotated = annotation.get_timeline()

                current_file = {
                    'uri': uri,
                    'database': 'VoxCeleb',
                    'annotation': annotation,
                    'annotated': annotated,
                }

                yield current_file
Ejemplo n.º 3
0
    def predict(self, features, min_duration=None, constraint=None):
        """
        Parameters
        ----------
        min_duration : float or dict, optional
            Minimum duration for each label, in seconds.
        """

        constraint_ = self._constraint(constraint, features)
        consecutive = self._consecutive(min_duration, features)

        X = self.X(features, unknown='keep')
        sliding_window = features.sliding_window
        converted_y = self.classifier_.predict(X,
                                               consecutive=consecutive,
                                               constraint=constraint_)

        annotation = Annotation()

        diff = list(np.where(np.diff(converted_y))[0])
        diff = [-1] + diff + [len(converted_y)]

        for t, T in pairwise(diff):
            segment = sliding_window.rangeToSegment(t, T - t)
            annotation[segment] = converted_y[t + 1]

        translation = self.label_converter_.inverse_mapping()

        return annotation.translate(translation)
Ejemplo n.º 4
0
    def _xxx_iter(self, subset):

        if not isinstance(subset, list):
            subsets = [subset]
        else:
            subsets = subset

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('verification')

        # segment                          uri                      start end  speaker      verification identification
        # A.J._Buckley/1zcIwhmdeo4_0000001 A.J._Buckley/1zcIwhmdeo4 14.7  22.8 A.J._Buckley dev          trn

        for subset in subsets:

            subset_data = data.get_group(subset)

            for uri, datum in subset_data.iterrows():

                annotation = Annotation(uri=uri)
                segment = Segment(0., datum.end - datum.start)
                annotation[segment] = datum.speaker

                annotated = annotation.get_timeline()

                current_file = {
                    'uri': uri,
                    'database': 'VoxCeleb',
                    'annotation': annotation,
                    'annotated': annotated,
                }

                yield current_file
Ejemplo n.º 5
0
def vad_construct_pyannote_object_per_file(
    vad_table_filepath: str, groundtruth_RTTM_file: str
) -> Tuple[Annotation, Annotation]:
    """
    Construct a Pyannote object for evaluation.
    Args:
        vad_table_filepath(str) : path of vad rttm-like table.
        groundtruth_RTTM_file(str): path of groundtruth rttm file.
    Returns:
        reference(pyannote.Annotation): groundtruth
        hypothesis(pyannote.Annotation): prediction
    """

    pred = pd.read_csv(vad_table_filepath, sep=" ", header=None)
    label = pd.read_csv(groundtruth_RTTM_file, sep=" ", delimiter=None, header=None)
    label = label.rename(columns={3: "start", 4: "dur", 7: "speaker"})

    # construct reference
    reference = Annotation()
    for index, row in label.iterrows():
        reference[Segment(row['start'], row['start'] + row['dur'])] = row['speaker']

    # construct hypothsis
    hypothesis = Annotation()
    for index, row in pred.iterrows():
        hypothesis[Segment(float(row[0]), float(row[0]) + float(row[1]))] = 'Speech'
    return reference, hypothesis
Ejemplo n.º 6
0
    def _xxx_iter(self, subset):

        if not isinstance(subset, list):
            subsets = [subset]
        else:
            subsets = subset

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('verification')

        # segment                          uri                      start end  speaker      verification identification
        # A.J._Buckley/1zcIwhmdeo4_0000001 A.J._Buckley/1zcIwhmdeo4 14.7  22.8 A.J._Buckley dev          trn

        for subset in subsets:

            subset_data = data.get_group(subset)

            for uri, datum in subset_data.iterrows():

                annotation = Annotation(uri=uri)
                segment = Segment(0., datum.end - datum.start)
                annotation[segment] = datum.speaker

                annotated = annotation.get_timeline()

                current_file = {
                    'uri': uri,
                    'database': 'VoxCeleb',
                    'annotation': annotation,
                    'annotated': annotated,
                }

                yield current_file
Ejemplo n.º 7
0
def vad_metrics(predictions,
                reference_segments,
                sr=22050,
                window_length=int(np.floor(0.032 * 22050)),
                hop_length=int(np.floor(0.016 * 22050))):
    frame_times = librosa.frames_to_time(range(len(predictions)),
                                         sr=sr,
                                         hop_length=hop_length,
                                         n_fft=window_length)
    predicted_segments = voice_segments(predictions, frame_times)

    hypothesis = Annotation()
    for seg in predicted_segments:
        hypothesis[Segment(seg[0], seg[1])] = 1

    reference = Annotation()
    for seg in reference_segments:
        reference[Segment(seg[0], seg[1])] = 1

    precision = DetectionPrecision()(reference, hypothesis)
    error = DetectionErrorRate()(reference, hypothesis)
    recall = DetectionRecall()(reference, hypothesis)
    accuracy = DetectionAccuracy()(reference, hypothesis)

    metrics = {
        "precision": precision,
        "error": error,
        "recall": recall,
        "accuracy": accuracy
    }

    print(metrics)

    return metrics
Ejemplo n.º 8
0
    def predict(self, features, min_duration=None, constraint=None):
        """
        Parameters
        ----------
        min_duration : float or dict, optional
            Minimum duration for each label, in seconds.
        """

        constraint_ = self._constraint(constraint, features)
        consecutive = self._consecutive(min_duration, features)

        X = self.X(features, unknown="keep")
        sliding_window = features.sliding_window
        converted_y = self.classifier_.predict(X, consecutive=consecutive, constraint=constraint_)

        annotation = Annotation()

        diff = list(np.where(np.diff(converted_y))[0])
        diff = [-1] + diff + [len(converted_y)]

        for t, T in pairwise(diff):
            segment = sliding_window.rangeToSegment(t, T - t)
            annotation[segment] = converted_y[t + 1]

        translation = self.label_converter_.inverse_mapping()

        return annotation.translate(translation)
Ejemplo n.º 9
0
def DER(outfile, AudioDataSet, annotationlist, audioLength):
    reference = Annotation()

    if not AudioDataSet == 'DiaExample':
        treeA = ET.parse(annotationlist[0])
        rootA = treeA.getroot()
        for child in rootA.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'A'

        treeB = ET.parse(annotationlist[1])
        rootB = treeB.getroot()
        for child in rootB.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'B'

        treeC = ET.parse(annotationlist[2])
        rootC = treeC.getroot()
        for child in rootC.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'C'

        treeD = ET.parse(annotationlist[3])
        rootD = treeD.getroot()
        for child in rootD.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'D'
    else:
        reference = Annotation()
        reference[Segment(0.15, 3.41)] = 'A'
        reference[Segment(3.83, 5.82)] = 'A'
        reference[Segment(6.75, 11.10)] = 'B'
        reference[Segment(11.32, 15.8)] = 'C'
        reference[Segment(15.9, 18.8)] = 'B'
        reference[Segment(18.8, 27.8)] = 'C'
        reference[Segment(27.8, 34.4)] = 'B'
        reference[Segment(34.4, 42)] = 'D'

    hypothesis = Annotation()
    f = open(outfile, 'r')
    for line in f.readlines():
        start = float(line.split(' ')[3])
        end = start + float(line.split(' ')[4])
        annotation = line.split(' ')[5][0:-1]
        hypothesis[Segment(start, end)] = annotation
    f.close()
    metric = DiarizationErrorRate()
    metricPurity = DiarizationPurity()
    uem = Timeline([Segment(0, audioLength)])

    print('DER: %.2f %%' % (metric(reference, hypothesis, uem=uem) * 100))
    print('Cluster Purity: %.2f %%' %
          (metricPurity(reference, hypothesis, uem=uem) * 100))

    return metric, reference, hypothesis
Ejemplo n.º 10
0
 def load_speaker(self, uri):
     speaker = Annotation(uri=uri)
     path = self.get_audio_path(uri)
     with open(path, 'r') as fp:
         for line in fp:
             start, duration, name, _, _ = line.strip().split()
             start = float(start)
             end = start + float(duration)
             speaker[Segment(start, end)] = name
     return speaker.smooth()
Ejemplo n.º 11
0
    def _turn_level(self, current_file: dict,
                    speech_turns: Annotation) -> Annotation:
        """Apply clustering at speech turn level

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol.
        speech_turns : `Annotation`
            Speech turns. Should only contain `str` labels.

        Returns
        -------
        hypothesis : `pyannote.core.Annotation`
            Clustering result.
        """

        assert_string_labels(speech_turns, "speech_turns")

        embedding = self._embedding(current_file)

        labels = speech_turns.labels()
        X, clustered_labels, skipped_labels = [], [], []
        for l, label in enumerate(labels):

            timeline = speech_turns.label_timeline(label, copy=False)

            # be more and more permissive until we have
            # at least one embedding for current speech turn
            for mode in ["strict", "center", "loose"]:
                x = embedding.crop(timeline, mode=mode)
                if len(x) > 0:
                    break

            # skip labels so small we don't have any embedding for it
            if len(x) < 1:
                skipped_labels.append(label)
                continue

            clustered_labels.append(label)
            X.append(np.mean(x, axis=0))

        # apply clustering of label embeddings
        clusters = self.clustering(np.vstack(X))

        # map each clustered label to its cluster (between 1 and N_CLUSTERS)
        mapping = {label: k for label, k in zip(clustered_labels, clusters)}

        # map each skipped label to its own cluster
        # (between -1 and -N_SKIPPED_LABELS)
        for l, label in enumerate(skipped_labels):
            mapping[label] = -(l + 1)

        # do the actual mapping
        return speech_turns.rename_labels(mapping=mapping)
Ejemplo n.º 12
0
def test_combi_categorical_dissimilarity():
    continuum = Continuum()
    annotation = Annotation()
    annotation[Segment(1, 5)] = 'Carol'
    annotation[Segment(6, 8)] = 'Bob'
    annotation[Segment(12, 18)] = 'Carol'
    annotation[Segment(7, 20)] = 'Alice'
    continuum.add_annotation('liza', annotation)
    annotation = Annotation()
    annotation[Segment(2, 6)] = 'Carol'
    annotation[Segment(7, 8)] = 'Bob'
    annotation[Segment(12, 18)] = 'Alice'
    annotation[Segment(8, 10)] = 'Alice'
    annotation[Segment(7, 19)] = 'Jeremy'
    continuum.add_annotation('pierrot', annotation)
    categories = ['Carol', 'Bob', 'Alice', 'Jeremy']

    cat = np.array([[0, 0.5, 0.3, 0.7], [0.5, 0., 0.6, 0.4],
                    [0.3, 0.6, 0., 0.7], [0.7, 0.4, 0.7, 0.]])
    combi_dis = CombinedCategoricalDissimilarity(categories=categories,
                                                 delta_empty=0.5,
                                                 cat_dissimilarity_matrix=cat,
                                                 alpha=3,
                                                 beta=1)
    list_dis = []
    for liza_unit in continuum['liza']:
        for pierrot_unit in continuum['pierrot']:
            unit_alignment = UnitaryAlignment(
                (("liza", liza_unit), ("pierrot", pierrot_unit)))
            list_dis.append(unit_alignment.compute_disorder(combi_dis))
    print(len(list_dis))
    assert list_dis == pytest.approx([
        0.09375, 5.11, 2.69375, 6.15, 8.790000000000001, 1.75,
        0.16666666666666666, 1.3020408163265305, 1.8, 6.3, 2.0237024221453286,
        1.4020408163265305, 0.3524, 0.8066666666666665, 0.20360110803324097,
        7.260000000000002, 7.137755102040815, 0.5166666666666666, 3.525, 0.15
    ], 0.001)

    unit_align_a = UnitaryAlignment(
        (("liza", Unit(Segment(1, 5),
                       "Carol")), ("pierrot", Unit(Segment(7, 19), "Jeremy"))))
    unit_align_b = UnitaryAlignment((
        ("pierrot", Unit(Segment(7, 19), "Jeremy")),
        ("liza", Unit(Segment(1, 5), "Carol")),
    ))
    assert (unit_align_a.compute_disorder(combi_dis) ==
            unit_align_b.compute_disorder(combi_dis))

    same_align = UnitaryAlignment(
        (("liza", Unit(Segment(1, 5),
                       "Carol")), ("pierrot", Unit(Segment(1, 5), "Carol"))))

    assert same_align.compute_disorder(combi_dis) == np.float32(0.0)
Ejemplo n.º 13
0
def test_bug_16():
    reference = Annotation()
    reference[Segment(0, 10)] = 'A'
    hypothesis = Annotation()

    metric = DiarizationErrorRate(collar=1)
    total = metric(reference, hypothesis, detailed=True)['total']
    npt.assert_almost_equal(total, 9, decimal=3)

    metric = DiarizationErrorRate(collar=0)
    total = metric(reference, hypothesis, detailed=True)['total']
    npt.assert_almost_equal(total, 10, decimal=3)
Ejemplo n.º 14
0
 def init_annotations(self):
     ref, hyp = {}, {}
     for ivecset in self.ivecs:
         if ivecset.size() > 0:
             name = ivecset.name
             # dirty trick, will be removed, watch out
             if 'beamformed' in name:
                 name = re.sub('beamformed/', '', name)
             # # # # # # # # # # # # # # # # # # # # #
             name = re.sub('/.*', '', name)
             ref[name], hyp[name] = Annotation(), Annotation()
     return ref, hyp
Ejemplo n.º 15
0
def convert_labels(y_true, y_pred):
    reference = Annotation()
    hypothesis = Annotation()

    for i, (r, h) in enumerate(zip(y_true, y_pred)):
        segment = Segment(i, i + 1)

        if h != SILENCE:
            hypothesis[segment] = h
        if r != SILENCE:
            reference[segment] = r

    return hypothesis, reference
Ejemplo n.º 16
0
def test_extrude():
    annotation = Annotation()
    annotation[Segment(0, 10)] = "A"
    annotation[Segment(15, 20)] = "A"
    annotation[Segment(20, 35)] = "B"
    annotation[Segment(15, 25)] = "C"
    annotation[Segment(30, 35)] = "C"

    extrusion_tl = Timeline([Segment(5, 12),
                             Segment(14, 25)])

    intersection_expected = Annotation()
    intersection_expected[Segment(0, 5)] = "A"
    intersection_expected[Segment(25, 35)] = "B"
    intersection_expected[Segment(30, 35)] = "C"

    assert (annotation.extrude(extrusion_tl, mode="intersection")
            ==
            intersection_expected)

    loose_expected = Annotation()
    loose_expected[Segment(30, 35)] = "C"

    assert (annotation.extrude(extrusion_tl, mode="loose")
            ==
            loose_expected)

    strict_expected = Annotation()
    strict_expected[Segment(0, 10)] = "A"
    strict_expected[Segment(20, 35)] = "B"
    strict_expected[Segment(30, 35)] = "C"

    assert (annotation.extrude(extrusion_tl, mode="strict")
            ==
            strict_expected)
Ejemplo n.º 17
0
def calculate_der(reference_filename, hypothesis_filename):
    lbls = Util.read_audacity_labels(reference_filename)
    reference = Annotation()
    for lbl in lbls:
        reference[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label

    predicted_lbls = Util.read_audacity_labels(hypothesis_filename)
    hypothesis = Annotation()
    for lbl in predicted_lbls:
        if lbl.label != 'non_speech':
            hypothesis[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label

    metric = DiarizationErrorRate()
    der = metric(reference, hypothesis)
    return der
Ejemplo n.º 18
0
    def _partition(self, timeline, coverage):

        # boundaries (as set of timestamps)
        boundaries = set([])
        for segment in timeline:
            boundaries.add(segment.start)
            boundaries.add(segment.end)

        # partition (as timeline)
        partition = Annotation()
        for start, end in pairwise(sorted(boundaries)):
            segment = Segment(start, end)
            partition[segment] = '_'

        return partition.crop(coverage, mode='intersection').anonymize_tracks()
Ejemplo n.º 19
0
def test_crop_strict(annotation):
    expected = Annotation(
        uri='TheBigBangTheory.Season01.Episode01',
        modality='speaker')
    expected[Segment(5.5, 7), '_'] = 'Leonard'
    actual = annotation.crop(Segment(5, 9), mode='strict')
    assert actual == expected, str(actual)
Ejemplo n.º 20
0
def test_from_json(annotation):
    # Check that we can reconstruct an annotation from the dict
    # returned by for_json.
    data = annotation.for_json()
    actual = Annotation.from_json(data)
    expected = annotation
    assert actual == expected
Ejemplo n.º 21
0
def test_from_records(annotation):
    # Check that we can reconstruct an annotation from the
    # output of itertracks.
    records = annotation.itertracks(yield_label=True)
    actual = Annotation.from_records(records)
    expected = annotation
    assert actual == expected
Ejemplo n.º 22
0
def reference():
    reference = Annotation()
    reference[Segment(0, 5)] = 'A'
    reference[Segment(6, 10)] = 'B'
    reference[Segment(12, 14)] = 'A'
    reference[Segment(15, 20)] = 'C'
    return reference
Ejemplo n.º 23
0
def clip_to_annotations(clip_number, lena_mappings, human_mappings):
    """ Returns (human_annotation, lena_annotation)
    """
    df = pd.read_csv(METADATA_PATH, index_col='ClipNumber')

    its_filename = df.loc[clip_number].ProcessingFile
    chat_filename = 'e{}.cha'.format(its_filename.split('.')[0])
    textgrid_filename = 'Clip{}.TextGrid'.format(clip_number)

    lena_dict = lena_chat_to_dict(os.path.join(CHAT_PATH, chat_filename))
    textgrid_dict = textgrid_to_dict(os.path.join(TEXTGRID_PATH, textgrid_filename))
    
    # remap
    lena_dict = remap(lena_dict, lena_mappings)
    textgrid_dict = remap(textgrid_dict, human_mappings)

    # set default (silence) class
    lena_annotation = dict_to_annotation(lena_dict, lena_mappings['SIL'])
    human_annotation = dict_to_annotation(textgrid_dict, human_mappings['Silence'])

    start_time = df.loc[clip_number].StartTimeS
    end_time = start_time + 300 # 5 minutes
    
    # The crop doesn't begin at 0, but at start_time, so we need to shift it left.
    lena_cropped = lena_annotation.crop(Segment(start_time, end_time))
    lena_annotation_shifted = Annotation()
    for segment, track, label in lena_cropped.itertracks(yield_label=True):
        shifted_segment = Segment(segment.start - start_time, segment.end - start_time)
        lena_annotation_shifted[shifted_segment, track] = label

    return human_annotation, lena_annotation_shifted
Ejemplo n.º 24
0
    def run(self):
        with self.in_subtitles().open('r') as fp:
            transcription = pyannote.core.json.load(fp)
        annotation = Annotation()
        label = 0
        for start, end, edge in transcription.ordered_edges_iter(data=True):
            if 'subtitle' not in edge:
                continue
            segment = Segment(start, end)
            annotation[segment] = label
            label += 1

        annotation = annotation.anonymize_labels(generator='string')

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(annotation, fp)
Ejemplo n.º 25
0
    def _xxx_iter(self, subset):

        data = self._load_data(subset)

        AnnotatedGroups = data['annotated'].groupby(by='uri')
        AnnotationGroups = data['annotation'].groupby(by='uri')

        for raw_uri, annotated in AnnotatedGroups:

            uri = f'{raw_uri}.Mix-Headset'

            segments = []
            for segment in annotated.itertuples():
                segments.append(Segment(start=segment.start, end=segment.end))

            annotation = Annotation(uri=uri)
            for t, turn in enumerate(
                    AnnotationGroups.get_group(raw_uri).itertuples()):
                segment = Segment(start=turn.start,
                                  end=turn.start + turn.duration)
                annotation[segment, t] = turn.speaker

            current_file = {
                'database': 'Test',
                'uri': uri,
                'annotated': Timeline(uri=uri, segments=segments),
                'annotation': annotation
            }

            yield current_file
Ejemplo n.º 26
0
    def _as_scores(self, raw, features, segmentation):

        if isinstance(segmentation, Timeline):
            annotation = Annotation(uri=segmentation.uri)
            for segment in segmentation:
                annotation[segment] = '?'
            segmentation = annotation

        # convert to pyannote-style & aggregate over each segment
        scores = Scores(uri=segmentation.uri,
                        modality=segmentation.modality,
                        annotation=segmentation,
                        labels=list(self.label_converter_))

        sliding_window = features.sliding_window

        for segment, track in segmentation.itertracks():

            # extract raw for all features in segment and aggregate
            i_start, i_duration = sliding_window.segmentToRange(segment)
            p = np.mean(raw[i_start:i_start + i_duration, :], axis=0)

            for i, label in enumerate(self.label_converter_):
                scores[segment, track, label] = p[i]

        return scores
Ejemplo n.º 27
0
def reference():
    reference = Annotation()
    reference[Segment(0, 10)] = 'A'
    reference[Segment(12, 20)] = 'B'
    reference[Segment(24, 27)] = 'A'
    reference[Segment(30, 40)] = 'C'
    return reference
Ejemplo n.º 28
0
def reference_with_overlap():
    reference = Annotation()
    reference[Segment(0, 13)] = 'A'
    reference[Segment(12, 20)] = 'B'
    reference[Segment(24, 27)] = 'A'
    reference[Segment(30, 40)] = 'C'
    return reference
Ejemplo n.º 29
0
    def __call__(self):

        # list of chronologically sorted list of shots
        graph = self._threads_graph()
        threads = [sorted(cc) for cc in nx.connected_components(graph)]

        annotation = Annotation()
        labelGenerator = getLabelGenerator()

        # chronologically sorted threads (based on their first shot)
        for thread in sorted(threads, key=lambda thread: thread[0]):
            label = next(labelGenerator)
            for shot in thread:
                annotation[shot] = label

        return annotation.smooth()
    def _decode(
        self,
        current_file: ProtocolFile,
        hypothesis: Annotation,
        scores: SlidingWindowFeature,
        labels: Iterable,
    ) -> Annotation:

        N, K = scores.data.shape

        if self.allow_overlap:
            active_speakers = scores.data > 0.5

        else:
            if self.lock_speech:
                active_speakers = np.argmax(scores.data, axis=1) + 1

            else:
                active_speakers = np.argmax(scores.data, axis=1)

        # reconstruct annotation
        new_hypothesis = one_hot_decoding(active_speakers,
                                          scores.sliding_window,
                                          labels=labels)

        new_hypothesis.uri = hypothesis.uri

        if self.lock_speech:
            speech = hypothesis.get_timeline().support()
            new_hypothesis = new_hypothesis.crop(speech)

        return new_hypothesis
Ejemplo n.º 31
0
def load_mdtm(file_mdtm):
    """Load MDTM file

    Parameter
    ---------
    file_mdtm : `str`
        Path to MDTM file.

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote.core.Annotation} dictionary.
    """

    names = ['uri', 'NA1', 'start', 'duration', 'NA2', 'NA3', 'NA4', 'speaker']
    dtype = {'uri': str, 'start': float, 'duration': float, 'speaker': str}
    data = pd.read_csv(file_mdtm, names=names, dtype=dtype,
                       delim_whitespace=True)

    annotations = dict()
    for uri, turns in data.groupby('uri'):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            segment = Segment(turn.start, turn.start + turn.duration)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations
Ejemplo n.º 32
0
def rttm_to_annotation(input_rttm,
                       collapse_to_speech=False,
                       class_to_keep=None):
    """
        Given a path to a rttm file, create the corresponding Annotation objects
        containing the triplets (t_beg, t_end, activity)

    Parameters
    ----------
    input_rttm
        A path to a rttm file that must exist.

    Returns
    -------
        An Annotation object.
    """
    anno = Annotation(uri=input_rttm)
    if os.path.isfile(input_rttm):
        with open(input_rttm) as fn:
            for line in fn:
                row = line.split('\t')
                t_beg, t_dur, spkr = float(row[3]), float(row[4]), row[7]
                if row[7] == "":
                    raise ValueError("Speaker role is empty in %s" %
                                     os.path.basename(input_rttm))
                if class_to_keep is not None and spkr == class_to_keep:
                    # Keep only class of interest
                    anno[Segment(t_beg, t_beg + t_dur)] = spkr
                elif class_to_keep is None:
                    # Keep all classes
                    anno[Segment(t_beg, t_beg + t_dur)] = spkr
    return anno
Ejemplo n.º 33
0
    def preprocess(self, openface):
        """
        Parameters
        ----------
        openface : str
            Path to Openface features
        """

        # TODO : option to only keep 'detections'
        # (make sure it does not alter 'starting_point' segments)

        names = ['time', 'track']
        for i in range(128):
            names += ['d{0}'.format(i)]
        data = read_table(openface,
                          delim_whitespace=True,
                          header=None,
                          names=names)
        features = data.groupby('track')
        starting_point = Annotation(modality='face')
        for track, segment in features.apply(self._to_segment).iteritems():
            if not segment:
                continue
            starting_point[segment, track] = track

        return starting_point, features
Ejemplo n.º 34
0
def load_mdtm(file_mdtm):
    """Load MDTM file

    Parameter
    ---------
    file_mdtm : `str`
        Path to MDTM file.

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote.core.Annotation} dictionary.
    """

    names = ["uri", "NA1", "start", "duration", "NA2", "NA3", "NA4", "speaker"]
    dtype = {"uri": str, "start": float, "duration": float, "speaker": str}
    data = pd.read_csv(
        file_mdtm,
        names=names,
        dtype=dtype,
        delim_whitespace=True,
        keep_default_na=False,
    )

    annotations = dict()
    for uri, turns in data.groupby("uri"):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            segment = Segment(turn.start, turn.start + turn.duration)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations
Ejemplo n.º 35
0
    def __call__(self):

        # list of chronologically sorted list of shots
        graph = self._threads_graph()
        threads = [sorted(cc) for cc in nx.connected_components(graph)]

        annotation = Annotation()
        labelGenerator = getLabelGenerator()

        # chronologically sorted threads (based on their first shot)
        for thread in sorted(threads, key=lambda thread: thread[0]):
            label = next(labelGenerator)
            for shot in thread:
                annotation[shot] = label

        return annotation.smooth()
Ejemplo n.º 36
0
    def _partition(self, timeline, coverage):

        # boundaries (as set of timestamps)
        boundaries = set([])
        for segment in timeline:
            boundaries.add(segment.start)
            boundaries.add(segment.end)

        # partition (as timeline)
        partition = Annotation()
        for start, end in pairwise(sorted(boundaries)):
            segment = Segment(start, end)
            partition[segment] = '_'

        cropped = partition.crop(coverage, mode='intersection')

        return partition.crop(coverage, mode='intersection').anonymize_tracks()
Ejemplo n.º 37
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_speaker().open('r') as fp:
            speaker = pyannote.core.json.load(fp)

        timeline = Timeline()
        for segment, _ in speaker.itertracks():
            timeline.add(segment)

        # fill gaps
        for gap in timeline.gaps(extent):
            if gap.duration < self.fill_gaps:
                timeline.add(gap)

        timeline = timeline.coverage()

        # dump as annotation...
        if self.to_annotation:

            annotation = Annotation()
            for s, segment in enumerate(timeline):
                annotation[segment] = s
            annotation = annotation.anonymize_labels(generator='string')

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(annotation, fp)

        # ... or as timeline
        else:

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(timeline, fp)
Ejemplo n.º 38
0
    def trn_iter(self):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('identification').get_group('trn')

        for uri, rows in data.groupby('uri'):
            annotation = Annotation(uri=uri)
            for row in rows.itertuples():
                segment = Segment(row.start, row.end)
                annotation[segment] = row.speaker
            annotated = annotation.get_timeline()

            current_file = {
                'uri': uri,
                'database': 'VoxCeleb',
                'annotation': annotation,
                'annotated': annotated,
            }

            yield current_file
Ejemplo n.º 39
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_speaker().open('r') as fp:
            speaker = pyannote.core.json.load(fp)

        segmentation = Annotation()
        for segment, _ in speaker.itertracks():
            segmentation[segment] = 'speech'
        segmentation = segmentation.smooth()

        for gap in segmentation.get_timeline().gaps(extent):
                segmentation[gap] = 'non_speech'
        segmentation = segmentation.smooth()

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(segmentation, fp)
Ejemplo n.º 40
0
    def iter_triplets(self, from_annotation):
        """Yield (anchor, positive, negative) segment triplets

        Parameters
        ----------
        from_annotation : Annotation
            Annotation from which triplets are obtained.
        """

        t = RandomTrackTriplets(per_label=self.per_label,
                                yield_label=self.yield_label)

        annotation = Annotation(uri=from_annotation.uri,
                                modality=from_annotation.modality)
        for segment, track, label in from_annotation.itertracks(label=True):
            if segment.duration < self.duration:
                continue
            annotation[segment, track] = label

        if len(annotation.labels()) < 2:
            return

        triplets = t.iter_triplets(annotation)

        for triplet in triplets:

            a, p, n = [item[0] for item in triplet]

            if self.duration:
                a, p, n = [self.pick(s) for s in (a, p, n)]

            if self.yield_label:
                a_, p_, n_ = [item[2] for item in triplet]
                yield (a, a_), (p, p_), (n, n_)
            else:
                yield a, p, n
Ejemplo n.º 41
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_subtitles().open('r') as fp:
            transcription = pyannote.core.json.load(fp)
        annotation = Annotation()
        for start, end, edge in transcription.ordered_edges_iter(data=True):
            if 'subtitle' not in edge:
                continue
            segment = Segment(start, end)
            annotation[segment] = 'speech'

        for gap in annotation.get_timeline().gaps(extent):
            annotation[gap] = 'non_speech'

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(annotation, fp)
Ejemplo n.º 42
0
    def regression(self, reference, before, after, uem=None, uemified=False):

        _, before, errors_before = self.difference(
            reference, before, uem=uem, uemified=True)

        reference, after, errors_after = self.difference(
            reference, after, uem=uem, uemified=True)

        behaviors = Annotation(uri=reference.uri, modality=reference.modality)

        # common (up-sampled) timeline
        common_timeline = errors_after.get_timeline().union(
            errors_before.get_timeline())
        common_timeline = common_timeline.segmentation()

        # align 'before' errors on common timeline
        B = self._tagger(errors_before, common_timeline)

        # align 'after' errors on common timeline
        A = self._tagger(errors_after, common_timeline)

        for segment in common_timeline:

            old_errors = B.get_labels(segment, unique=False)
            new_errors = A.get_labels(segment, unique=False)

            n1 = len(old_errors)
            n2 = len(new_errors)
            n = max(n1, n2)

            match = np.zeros((n, n), dtype=int)
            for i1, e1 in enumerate(old_errors):
                for i2, e2 in enumerate(new_errors):
                    match[i1, i2] = self._match_errors(e1, e2)

            mapping = self.munkres.compute(2 - match)

            for i1, i2 in mapping:

                if i1 >= n1:
                    track = behaviors.new_track(segment,
                                                candidate=REGRESSION,
                                                prefix=REGRESSION)
                    behaviors[segment, track] = (
                        REGRESSION, None, new_errors[i2])

                elif i2 >= n2:
                    track = behaviors.new_track(segment,
                                                candidate=IMPROVEMENT,
                                                prefix=IMPROVEMENT)
                    behaviors[segment, track] = (
                        IMPROVEMENT, old_errors[i1], None)

                elif old_errors[i1][0] == MATCH_CORRECT:

                    if new_errors[i2][0] == MATCH_CORRECT:
                        track = behaviors.new_track(segment,
                                                    candidate=BOTH_CORRECT,
                                                    prefix=BOTH_CORRECT)
                        behaviors[segment, track] = (
                            BOTH_CORRECT, old_errors[i1], new_errors[i2])

                    else:
                        track = behaviors.new_track(segment,
                                                    candidate=REGRESSION,
                                                    prefix=REGRESSION)
                        behaviors[segment, track] = (
                            REGRESSION, old_errors[i1], new_errors[i2])

                else:

                    if new_errors[i2][0] == MATCH_CORRECT:
                        track = behaviors.new_track(segment,
                                                    candidate=IMPROVEMENT,
                                                    prefix=IMPROVEMENT)
                        behaviors[segment, track] = (
                            IMPROVEMENT, old_errors[i1], new_errors[i2])

                    else:
                        track = behaviors.new_track(segment,
                                                    candidate=BOTH_INCORRECT,
                                                    prefix=BOTH_INCORRECT)
                        behaviors[segment, track] = (
                            BOTH_INCORRECT, old_errors[i1], new_errors[i2])

        behaviors = behaviors.smooth()

        if uemified:
            return reference, before, after, behaviors
        else:
            return behaviors
        names = ['time', 'track_id', 'left', 'top', 'right', 'bottom']
        face_tracking = pd.read_table(path, delim_whitespace=True, header=None, names=names)
        pyannote_face = Annotation(uri=uri)
        for track_id, track in face_tracking.groupby('track_id'):
            start = track['time'].min()
            end = track['time'].max()
            label = mapping.get(track_id, None)
            if label is None:
                SKIP = 'Skipping track #{track_id} ({duration:d} ms) in {video_id}'
                print(SKIP.format(track_id=track_id, duration=int(1000*(end-start)), video_id=video_id))
            pyannote_face[Segment(start, end), track_id] = label

        # load names as pyannote.Annotation
        path = OCR.format(repository=REPOSITORY, uri=uri)
        names = ['start', 'end', 'start_frame', 'end_frame', 'name', 'confidence']
        pyannote_ocr = Annotation(uri=uri)
        try:
            ocr = pd.read_table(path, delim_whitespace=True, header=None, names=names)
            for _, (start, end, _, _, name, _) in ocr.iterrows():
                pyannote_ocr[Segment(start, end)] = name
        except pandas.parser.CParserError as e:
            pass

        # name each person by most co-occurring OCR name
        if not pyannote_ocr:
            named_face = Annotation(uri=uri)
        else:
            named_face = argmax_tagger(pyannote_ocr, pyannote_face)
            named_face = named_face.subset(pyannote_ocr.labels())

        path = FUSION.format(repository=REPOSITORY, uri=uri)
Ejemplo n.º 44
0
    def read(self, path, uri=None, modality=None, **kwargs):
        """

        Parameters
        ----------
        path : str

        modality : str, optional
            Force all entries to be considered as coming from this modality.
            Only taken into account when file format does not provide
            any field related to modality (e.g. .seg files)

        """

        # load whole file
        df = pandas.read_table(path,
                               delim_whitespace=True,
                               header=None, names=self.fields(),
                               comment=self.comment(),
                               converters=self.converters(),
                               dtype={PYANNOTE_LABEL: object})

        # remove comment lines
        # (i.e. lines for which all fields are either None or NaN)
        keep = [not all(pandas.isnull(item) for item in row[1:])
                for row in df.itertuples()]
        df = df[keep]

        # add 'segment' column build from start time & duration
        df[PYANNOTE_SEGMENT] = [self.get_segment(row)
                                for row in df.itertuples()]

        # add unique track numbers if they are not read from file
        if PYANNOTE_TRACK not in self.fields():
            df[PYANNOTE_TRACK] = range(df.shape[0])

        # add uri column in case it does not exist
        if PYANNOTE_URI not in df:
            if uri is None:
                raise ValueError('missing uri -- use uri=')
            df[PYANNOTE_URI] = uri

        # obtain list of resources
        uris = list(df[PYANNOTE_URI].unique())

        # add modality column in case it does not exist
        if PYANNOTE_MODALITY not in df:
            if modality is None:
                raise ValueError('missing modality -- use modality=')
            df[PYANNOTE_MODALITY] = modality if modality is not None else ""

        # obtain list of modalities
        modalities = list(df[PYANNOTE_MODALITY].unique())

        self._loaded = {}

        # loop on resources
        for uri in uris:

            # filter based on resource
            df_ = df[df[PYANNOTE_URI] == uri]

            # loop on modalities
            for modality in modalities:

                # filter based on modality
                modality = modality if modality is not None else ""
                df__ = df_[df_[PYANNOTE_MODALITY] == modality]
                a = Annotation.from_df(df__, modality=modality, uri=uri)
                self._loaded[uri, modality] = a

        return self
Ejemplo n.º 45
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(focus=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(focus=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(focus=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        for segment in correct:
            status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'frontier'
        for segment in only_over:
            status[segment, '_'] = 'over'
        for segment in only_under:
            status[segment, '_'] = 'under'

        return status.smooth()
        dic_trackID_st_to_speakingFace = {}
        for s, t, st in sd.itertracks(label=True):
            dic_trackID_st_to_speakingFace[t] = ['', thr_propagation]
        
        for line in open(args['<mat_speaking_face>']+'/'+videoID+'.mat').read().splitlines():
            TrackID_st, TrackID_Face, proba = line.split(' ')
            if float(proba) > dic_trackID_st_to_speakingFace[int(TrackID_st)][1]: 
                dic_trackID_st_to_speakingFace[int(TrackID_st)] = [int(TrackID_Face), float(proba)]

        trackID_face_to_name = {}
        for s, t, name in NamedSpk.itertracks(label=True):
            if dic_trackID_st_to_speakingFace[t][0] != '': 
                trackID_face_to_name[dic_trackID_st_to_speakingFace[t][0]] = name

        namedFaces = Annotation(uri=videoID)
        for s, t, faceID in faces.itertracks(label=True):
            if t in trackID_face_to_name: 
                namedFaces[s, t] = trackID_face_to_name[t]

        # write person visible and speaking in a shot:
        for sshot, tshot, shot in shots.itertracks(label=True):
            NamedSpkShot = NamedSpk.crop(sshot)
            NamedFaceShot = namedFaces.crop(sshot)
            PersonShot = set(NamedSpkShot.labels()) & set(NamedFaceShot.labels())

            for p in (PersonShot & set(evidences.keys())):
                conf = 0.0
                for sSpk in NamedSpkShot.label_timeline(p):
                    for sON, tON, name in ON.itertracks(label=True):
                        if name == p:
        uri = corpus_id + '/' + video_id

        # load shots as pyannote.Annotation
        path = SHOTS.format(repository=REPOSITORY, uri=uri)
        names = ['corpus_id', 'video_id', 'shot_id', 'start', 'end']
        dtype = {'shot_id': str}
        shots = pd.read_table(path, delim_whitespace=True, header=None, names=names, dtype=dtype)
        pyannote_shots = Annotation(uri=uri)
        for _, (_, _, shot_id, start, end) in shots.iterrows():
            pyannote_shots[Segment(start, end), shot_id] = shot_id

        # load speaker diarization as pyannote.Annotation
        path = SPEAKERS.format(repository=REPOSITORY, uri=uri)
        names = ['corpus_id', 'video_id', 'start', 'end', 'label', 'gender']
        speakers = pd.read_table(path, delim_whitespace=True, header=None, names=names)
        pyannote_speakers = Annotation(uri=uri)
        for _, (_, _, start, end, label, _) in speakers.iterrows():
            pyannote_speakers[Segment(start, end)] = label
        pyannote_speakers = pyannote_speakers.anonymize_labels(generator='int')

        # load names as pyannote.Annotation
        path = OCR.format(repository=REPOSITORY, uri=uri)
        names = ['start', 'end', 'start_frame', 'end_frame', 'name', 'confidence']
        pyannote_ocr = Annotation(uri=uri)
        try:
            ocr = pd.read_table(path, delim_whitespace=True, header=None, names=names)
            for _, (start, end, _, _, name, _) in ocr.iterrows():
                pyannote_ocr[Segment(start, end)] = name
        except pandas.parser.CParserError as e:
            pass
Ejemplo n.º 48
0
    def difference(self, reference, hypothesis, uem=None, uemified=False):
        """Get error analysis as `Annotation`

        Labels are (status, reference_label, hypothesis_label) tuples.
        `status` is either 'correct', 'confusion', 'missed detection' or
        'false alarm'.
        `reference_label` is None in case of 'false alarm'.
        `hypothesis_label` is None in case of 'missed detection'.

        Parameters
        ----------
        uemified : bool, optional
            Returns "uemified" version of reference and hypothesis.
            Defaults to False.

        Returns
        -------
        errors : `Annotation`

        """

        reference, hypothesis = self.uemify(
            reference, hypothesis, uem=uem, collar=self.collar)

        reference, hypothesis = self._handle_unknowns(reference, hypothesis)

        # common (up-sampled) timeline
        common_timeline = reference.get_timeline().union(
            hypothesis.get_timeline())
        common_timeline = common_timeline.segmentation()

        # align reference on common timeline
        R = self._tagger(reference, common_timeline)

        # translate and align hypothesis on common timeline
        H = self._tagger(hypothesis, common_timeline)

        errors = Annotation(uri=reference.uri, modality=reference.modality)

        # loop on all segments
        for segment in common_timeline:

            # list of labels in reference segment
            rlabels = R.get_labels(segment, unknown=self.unknown, unique=False)

            # list of labels in hypothesis segment
            hlabels = H.get_labels(segment, unknown=self.unknown, unique=False)

            _, details = self.matcher(rlabels, hlabels)

            for r, h in details[MATCH_CORRECT]:
                track = errors.new_track(segment, prefix=MATCH_CORRECT)
                errors[segment, track] = (MATCH_CORRECT, r, h)

            for r, h in details[MATCH_CONFUSION]:
                track = errors.new_track(segment, prefix=MATCH_CONFUSION)
                errors[segment, track] = (MATCH_CONFUSION, r, h)

            for r in details[MATCH_MISSED_DETECTION]:
                track = errors.new_track(segment,
                                         prefix=MATCH_MISSED_DETECTION)
                errors[segment, track] = (MATCH_MISSED_DETECTION, r, None)

            for h in details[MATCH_FALSE_ALARM]:
                track = errors.new_track(segment, prefix=MATCH_FALSE_ALARM)
                errors[segment, track] = (MATCH_FALSE_ALARM, None, h)

        if uemified:
            return reference, hypothesis, errors
        else:
            return errors