path = OCR.format(repository=REPOSITORY, uri=uri)
        names = ['start', 'end', 'start_frame', 'end_frame', 'name', 'confidence']
        pyannote_ocr = Annotation(uri=uri)
        try:
            ocr = pd.read_table(path, delim_whitespace=True, header=None, names=names)
            for _, (start, end, _, _, name, _) in ocr.iterrows():
                pyannote_ocr[Segment(start, end)] = name
        except pandas.parser.CParserError as e:
            pass

        # name each speaker by most co-occurring OCR name
        if not pyannote_ocr:
            named_speakers = Annotation(uri=uri)
        else:
            named_speakers = argmax_tagger(pyannote_ocr, pyannote_speakers)
            named_speakers = named_speakers.subset(pyannote_ocr.labels())

        path = FUSION.format(repository=REPOSITORY, uri=uri)
        directory = os.path.dirname(path)
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(path, 'w') as fp:

            duplicates = dict()

            for (speech_turn, track), (_, shot_id) in named_speakers.co_iter(pyannote_shots):

                original_person_name = named_speakers[speech_turn, track]

                person_name = mapping.setdefault(