コード例 #1
0
    def run(self):
        with self.in_subtitles().open('r') as fp:
            transcription = pyannote.core.json.load(fp)
        annotation = Annotation()
        label = 0
        for start, end, edge in transcription.ordered_edges_iter(data=True):
            if 'subtitle' not in edge:
                continue
            segment = Segment(start, end)
            annotation[segment] = label
            label += 1

        annotation = annotation.anonymize_labels(generator='string')

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(annotation, fp)
コード例 #2
0
    def run(self):

        # wav file duration
        wav = self.in_wav().path
        with contextlib.closing(wave.open(wav, 'r')) as f:
            frames = f.getnframes()
            rate = f.getframerate()
        duration = frames / rate
        extent = Segment(0., duration)

        with self.in_speaker().open('r') as fp:
            speaker = pyannote.core.json.load(fp)

        timeline = Timeline()
        for segment, _ in speaker.itertracks():
            timeline.add(segment)

        # fill gaps
        for gap in timeline.gaps(extent):
            if gap.duration < self.fill_gaps:
                timeline.add(gap)

        timeline = timeline.coverage()

        # dump as annotation...
        if self.to_annotation:

            annotation = Annotation()
            for s, segment in enumerate(timeline):
                annotation[segment] = s
            annotation = annotation.anonymize_labels(generator='string')

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(annotation, fp)

        # ... or as timeline
        else:

            with self.out_put().open('w') as fp:
                pyannote.core.json.dump(timeline, fp)
        path = SHOTS.format(repository=REPOSITORY, uri=uri)
        names = ['corpus_id', 'video_id', 'shot_id', 'start', 'end']
        dtype = {'shot_id': str}
        shots = pd.read_table(path, delim_whitespace=True, header=None, names=names, dtype=dtype)
        pyannote_shots = Annotation(uri=uri)
        for _, (_, _, shot_id, start, end) in shots.iterrows():
            pyannote_shots[Segment(start, end), shot_id] = shot_id

        # load speaker diarization as pyannote.Annotation
        path = SPEAKERS.format(repository=REPOSITORY, uri=uri)
        names = ['corpus_id', 'video_id', 'start', 'end', 'label', 'gender']
        speakers = pd.read_table(path, delim_whitespace=True, header=None, names=names)
        pyannote_speakers = Annotation(uri=uri)
        for _, (_, _, start, end, label, _) in speakers.iterrows():
            pyannote_speakers[Segment(start, end)] = label
        pyannote_speakers = pyannote_speakers.anonymize_labels(generator='int')

        # load names as pyannote.Annotation
        path = OCR.format(repository=REPOSITORY, uri=uri)
        names = ['start', 'end', 'start_frame', 'end_frame', 'name', 'confidence']
        pyannote_ocr = Annotation(uri=uri)
        try:
            ocr = pd.read_table(path, delim_whitespace=True, header=None, names=names)
            for _, (start, end, _, _, name, _) in ocr.iterrows():
                pyannote_ocr[Segment(start, end)] = name
        except pandas.parser.CParserError as e:
            pass

        # name each speaker by most co-occurring OCR name
        if not pyannote_ocr:
            named_speakers = Annotation(uri=uri)