예제 #1
0
def get_docinfo(base_pdf: pikepdf.Pdf, context: PdfContext) -> Dict[str, str]:
    options = context.options

    def from_document_info(key):
        try:
            s = base_pdf.docinfo[key]
            return str(s)
        except (KeyError, TypeError):
            return ''

    pdfmark = {
        k: from_document_info(k)
        for k in ('/Title', '/Author', '/Keywords', '/Subject',
                  '/CreationDate')
    }
    if options is not None:
        if options.title:
            pdfmark['/Title'] = options.title
        if options.author:
            pdfmark['/Author'] = options.author
        if options.keywords:
            pdfmark['/Keywords'] = options.keywords
        if options.subject:
            pdfmark['/Subject'] = options.subject

    creator_tag = context.plugin_manager.hook.get_ocr_engine().creator_tag(
        options)

    pdfmark['/Creator'] = f'{PROGRAM_NAME} {VERSION} / {creator_tag}'
    pdfmark['/Producer'] = f'pikepdf {pikepdf.__version__}'
    pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc))
    return pdfmark
예제 #2
0
def get_docinfo(base_pdf, options):
    def from_document_info(key):
        try:
            s = base_pdf.docinfo[key]
            return str(s)
        except (KeyError, TypeError):
            return ''

    pdfmark = {
        k: from_document_info(k)
        for k in ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate')
    }
    renderer_tag = 'OCR'
    if options is not None:
        if options.title:
            pdfmark['/Title'] = options.title
        if options.author:
            pdfmark['/Author'] = options.author
        if options.keywords:
            pdfmark['/Keywords'] = options.keywords
        if options.subject:
            pdfmark['/Subject'] = options.subject

        if options.pdf_renderer == 'sandwich':
            renderer_tag = 'OCR-PDF'

    pdfmark['/Creator'] = (
        f'{PROGRAM_NAME} {VERSION} / ' f'Tesseract {renderer_tag} {tesseract.version()}'
    )
    pdfmark['/Producer'] = f'pikepdf {pikepdf.__version__}'
    if 'OCRMYPDF_CREATOR' in os.environ:
        pdfmark['/Creator'] = os.environ['OCRMYPDF_CREATOR']
    if 'OCRMYPDF_PRODUCER' in os.environ:
        pdfmark['/Producer'] = os.environ['OCRMYPDF_PRODUCER']

    pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc))
    return pdfmark
예제 #3
0
def get_docinfo(base_pdf, options):
    def from_document_info(key):
        try:
            s = base_pdf.docinfo[key]
            return str(s)
        except (KeyError, TypeError):
            return ''

    pdfmark = {
        k: from_document_info(k)
        for k in ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate')
    }
    if options.title:
        pdfmark['/Title'] = options.title
    if options.author:
        pdfmark['/Author'] = options.author
    if options.keywords:
        pdfmark['/Keywords'] = options.keywords
    if options.subject:
        pdfmark['/Subject'] = options.subject

    if options.pdf_renderer == 'sandwich':
        renderer_tag = 'OCR-PDF'
    else:
        renderer_tag = 'OCR'

    pdfmark['/Creator'] = (
        f'{PROGRAM_NAME} {VERSION} / ' f'Tesseract {renderer_tag} {tesseract.version()}'
    )
    pdfmark['/Producer'] = f'pikepdf {pikepdf.__version__}'
    if 'OCRMYPDF_CREATOR' in os.environ:
        pdfmark['/Creator'] = os.environ['OCRMYPDF_CREATOR']
    if 'OCRMYPDF_PRODUCER' in os.environ:
        pdfmark['/Producer'] = os.environ['OCRMYPDF_PRODUCER']

    pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc))
    return pdfmark
예제 #4
0
def get_docinfo(base_pdf, options):
    def from_document_info(key):
        try:
            s = base_pdf.docinfo[key]
            return str(s)
        except (KeyError, TypeError):
            return ''

    pdfmark = {k: from_document_info(k) for k in
        ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate')}
    if options.title:
        pdfmark['/Title'] = options.title
    if options.author:
        pdfmark['/Author'] = options.author
    if options.keywords:
        pdfmark['/Keywords'] = options.keywords
    if options.subject:
        pdfmark['/Subject'] = options.subject

    if options.pdf_renderer == 'sandwich':
        renderer_tag = 'OCR-PDF'
    else:
        renderer_tag = 'OCR'

    pdfmark['/Creator'] = '{0} {1} / Tesseract {2} {3}'.format(
        PROGRAM_NAME, VERSION,
        renderer_tag,
        tesseract.version())
    pdfmark['/Producer'] = 'pikepdf ' + pikepdf.__version__
    if 'OCRMYPDF_CREATOR' in os.environ:
        pdfmark['/Creator'] = os.environ['OCRMYPDF_CREATOR']
    if 'OCRMYPDF_PRODUCER' in os.environ:
        pdfmark['/Producer'] = os.environ['OCRMYPDF_PRODUCER']

    pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc))
    return pdfmark