def run(self): with self.in_subtitles().open('r') as fp: transcription = pyannote.core.json.load(fp) annotation = Annotation() label = 0 for start, end, edge in transcription.ordered_edges_iter(data=True): if 'subtitle' not in edge: continue segment = Segment(start, end) annotation[segment] = label label += 1 annotation = annotation.anonymize_labels(generator='string') with self.out_put().open('w') as fp: pyannote.core.json.dump(annotation, fp)
def run(self): # wav file duration wav = self.in_wav().path with contextlib.closing(wave.open(wav, 'r')) as f: frames = f.getnframes() rate = f.getframerate() duration = frames / rate extent = Segment(0., duration) with self.in_speaker().open('r') as fp: speaker = pyannote.core.json.load(fp) timeline = Timeline() for segment, _ in speaker.itertracks(): timeline.add(segment) # fill gaps for gap in timeline.gaps(extent): if gap.duration < self.fill_gaps: timeline.add(gap) timeline = timeline.coverage() # dump as annotation... if self.to_annotation: annotation = Annotation() for s, segment in enumerate(timeline): annotation[segment] = s annotation = annotation.anonymize_labels(generator='string') with self.out_put().open('w') as fp: pyannote.core.json.dump(annotation, fp) # ... or as timeline else: with self.out_put().open('w') as fp: pyannote.core.json.dump(timeline, fp)
path = SHOTS.format(repository=REPOSITORY, uri=uri) names = ['corpus_id', 'video_id', 'shot_id', 'start', 'end'] dtype = {'shot_id': str} shots = pd.read_table(path, delim_whitespace=True, header=None, names=names, dtype=dtype) pyannote_shots = Annotation(uri=uri) for _, (_, _, shot_id, start, end) in shots.iterrows(): pyannote_shots[Segment(start, end), shot_id] = shot_id # load speaker diarization as pyannote.Annotation path = SPEAKERS.format(repository=REPOSITORY, uri=uri) names = ['corpus_id', 'video_id', 'start', 'end', 'label', 'gender'] speakers = pd.read_table(path, delim_whitespace=True, header=None, names=names) pyannote_speakers = Annotation(uri=uri) for _, (_, _, start, end, label, _) in speakers.iterrows(): pyannote_speakers[Segment(start, end)] = label pyannote_speakers = pyannote_speakers.anonymize_labels(generator='int') # load names as pyannote.Annotation path = OCR.format(repository=REPOSITORY, uri=uri) names = ['start', 'end', 'start_frame', 'end_frame', 'name', 'confidence'] pyannote_ocr = Annotation(uri=uri) try: ocr = pd.read_table(path, delim_whitespace=True, header=None, names=names) for _, (start, end, _, _, name, _) in ocr.iterrows(): pyannote_ocr[Segment(start, end)] = name except pandas.parser.CParserError as e: pass # name each speaker by most co-occurring OCR name if not pyannote_ocr: named_speakers = Annotation(uri=uri)