def test_page_operations(pdf_path, password): """ This test just checks if the operation throws an exception. This should be done way more thoroughly: It should be checked if the output is as expected. """ if pdf_path.startswith("http"): pdf_path = BytesIO(get_pdf_from_url(pdf_path, pdf_path.split("/")[-1])) else: pdf_path = os.path.join(RESOURCE_ROOT, pdf_path) reader = PdfReader(pdf_path) if password: reader.decrypt(password) page: PageObject = reader.pages[0] transformation = Transformation().rotate(90).scale(1).translate(1, 1) page.add_transformation(transformation, expand=True) page.add_transformation((1, 0, 0, 0, 0, 0)) page.scale(2, 2) page.scale_by(0.5) page.scale_to(100, 100) page.compress_content_streams() page.extract_text() page.scale_by(0.5) page.scale_to(100, 100) page.extract_text()
def test_text_extraction_encrypted(): inputfile = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") reader = PdfReader(inputfile) assert reader.is_encrypted is True reader.decrypt("openpassword") assert (reader.pages[0].extract_text().replace( "\n", "").strip().startswith("Lorem ipsum dolor sit amet"))
def page_ops(pdf_path, password): pdf_path = os.path.join(RESOURCE_ROOT, pdf_path) reader = PdfReader(pdf_path) if password: reader.decrypt(password) page = reader.pages[0] op = Transformation().rotate(90).scale(1.2) page.add_transformation(op) page.merge_page(page) op = Transformation().scale(1).translate(tx=1, ty=1) page.add_transformation(op) page.merge_page(page) op = Transformation().rotate(90).scale(1).translate(tx=1, ty=1) page.add_transformation(op) page.merge_page(page) page.add_transformation((1, 0, 0, 0, 0, 0)) page.scale(2, 2) page.scale_by(0.5) page.scale_to(100, 100) page.compress_content_streams() page.extract_text()
def test_decrypt_when_no_id(): """ Decrypt an encrypted file that's missing the 'ID' value in its trailer. https://github.com/mstamy2/PyPDF2/issues/608 """ with open(os.path.join(RESOURCE_ROOT, "encrypted_doc_no_id.pdf"), "rb") as inputfile: ipdf = PdfReader(inputfile) ipdf.decrypt("") assert ipdf.metadata == {"/Producer": "European Patent Office"}
def test_decrypt(): with open(os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf"), "rb") as inputfile: reader = PdfReader(inputfile) assert reader.is_encrypted is True reader.decrypt("openpassword") assert len(reader.pages) == 1 assert reader.is_encrypted is True metadict = reader.metadata assert dict(metadict) == { "/CreationDate": "D:20220403203552+02'00'", "/Creator": "Writer", "/Producer": "LibreOffice 6.4", }
def test_compress_content_streams(pdf_path, password): reader = PdfReader(pdf_path) if password: reader.decrypt(password) for page in reader.pages: page.compress_content_streams()