path = OCR.format(repository=REPOSITORY, uri=uri) names = ['start', 'end', 'start_frame', 'end_frame', 'name', 'confidence'] pyannote_ocr = Annotation(uri=uri) try: ocr = pd.read_table(path, delim_whitespace=True, header=None, names=names) for _, (start, end, _, _, name, _) in ocr.iterrows(): pyannote_ocr[Segment(start, end)] = name except pandas.parser.CParserError as e: pass # name each speaker by most co-occurring OCR name if not pyannote_ocr: named_speakers = Annotation(uri=uri) else: named_speakers = argmax_tagger(pyannote_ocr, pyannote_speakers) named_speakers = named_speakers.subset(pyannote_ocr.labels()) path = FUSION.format(repository=REPOSITORY, uri=uri) directory = os.path.dirname(path) if not os.path.exists(directory): os.makedirs(directory) with open(path, 'w') as fp: duplicates = dict() for (speech_turn, track), (_, shot_id) in named_speakers.co_iter(pyannote_shots): original_person_name = named_speakers[speech_turn, track] person_name = mapping.setdefault(