def get_docinfo(base_pdf: pikepdf.Pdf, context: PdfContext) -> Dict[str, str]: options = context.options def from_document_info(key): try: s = base_pdf.docinfo[key] return str(s) except (KeyError, TypeError): return '' pdfmark = { k: from_document_info(k) for k in ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate') } if options is not None: if options.title: pdfmark['/Title'] = options.title if options.author: pdfmark['/Author'] = options.author if options.keywords: pdfmark['/Keywords'] = options.keywords if options.subject: pdfmark['/Subject'] = options.subject creator_tag = context.plugin_manager.hook.get_ocr_engine().creator_tag( options) pdfmark['/Creator'] = f'{PROGRAM_NAME} {VERSION} / {creator_tag}' pdfmark['/Producer'] = f'pikepdf {pikepdf.__version__}' pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc)) return pdfmark
def get_docinfo(base_pdf, options): def from_document_info(key): try: s = base_pdf.docinfo[key] return str(s) except (KeyError, TypeError): return '' pdfmark = { k: from_document_info(k) for k in ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate') } renderer_tag = 'OCR' if options is not None: if options.title: pdfmark['/Title'] = options.title if options.author: pdfmark['/Author'] = options.author if options.keywords: pdfmark['/Keywords'] = options.keywords if options.subject: pdfmark['/Subject'] = options.subject if options.pdf_renderer == 'sandwich': renderer_tag = 'OCR-PDF' pdfmark['/Creator'] = ( f'{PROGRAM_NAME} {VERSION} / ' f'Tesseract {renderer_tag} {tesseract.version()}' ) pdfmark['/Producer'] = f'pikepdf {pikepdf.__version__}' if 'OCRMYPDF_CREATOR' in os.environ: pdfmark['/Creator'] = os.environ['OCRMYPDF_CREATOR'] if 'OCRMYPDF_PRODUCER' in os.environ: pdfmark['/Producer'] = os.environ['OCRMYPDF_PRODUCER'] pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc)) return pdfmark
def get_docinfo(base_pdf, options): def from_document_info(key): try: s = base_pdf.docinfo[key] return str(s) except (KeyError, TypeError): return '' pdfmark = { k: from_document_info(k) for k in ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate') } if options.title: pdfmark['/Title'] = options.title if options.author: pdfmark['/Author'] = options.author if options.keywords: pdfmark['/Keywords'] = options.keywords if options.subject: pdfmark['/Subject'] = options.subject if options.pdf_renderer == 'sandwich': renderer_tag = 'OCR-PDF' else: renderer_tag = 'OCR' pdfmark['/Creator'] = ( f'{PROGRAM_NAME} {VERSION} / ' f'Tesseract {renderer_tag} {tesseract.version()}' ) pdfmark['/Producer'] = f'pikepdf {pikepdf.__version__}' if 'OCRMYPDF_CREATOR' in os.environ: pdfmark['/Creator'] = os.environ['OCRMYPDF_CREATOR'] if 'OCRMYPDF_PRODUCER' in os.environ: pdfmark['/Producer'] = os.environ['OCRMYPDF_PRODUCER'] pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc)) return pdfmark
def get_docinfo(base_pdf, options): def from_document_info(key): try: s = base_pdf.docinfo[key] return str(s) except (KeyError, TypeError): return '' pdfmark = {k: from_document_info(k) for k in ('/Title', '/Author', '/Keywords', '/Subject', '/CreationDate')} if options.title: pdfmark['/Title'] = options.title if options.author: pdfmark['/Author'] = options.author if options.keywords: pdfmark['/Keywords'] = options.keywords if options.subject: pdfmark['/Subject'] = options.subject if options.pdf_renderer == 'sandwich': renderer_tag = 'OCR-PDF' else: renderer_tag = 'OCR' pdfmark['/Creator'] = '{0} {1} / Tesseract {2} {3}'.format( PROGRAM_NAME, VERSION, renderer_tag, tesseract.version()) pdfmark['/Producer'] = 'pikepdf ' + pikepdf.__version__ if 'OCRMYPDF_CREATOR' in os.environ: pdfmark['/Creator'] = os.environ['OCRMYPDF_CREATOR'] if 'OCRMYPDF_PRODUCER' in os.environ: pdfmark['/Producer'] = os.environ['OCRMYPDF_PRODUCER'] pdfmark['/ModDate'] = encode_pdf_date(datetime.now(timezone.utc)) return pdfmark