def test_get_images(src, nb_images): src = os.path.join(RESOURCE_ROOT, src) reader = PdfFileReader(src) with pytest.raises(TypeError): page = reader.pages["0"] page = reader.pages[-1] page = reader.pages[0] images_extracted = [] if RES.XOBJECT in page[PG.RESOURCES]: xObject = page[PG.RESOURCES][RES.XOBJECT].getObject() for obj in xObject: if xObject[obj][IA.SUBTYPE] == "/Image": extension, byte_stream = _xobj_to_image(xObject[obj]) if extension is not None: filename = obj[1:] + ".png" with open(filename, "wb") as img: img.write(byte_stream) images_extracted.append(filename) assert len(images_extracted) == nb_images # Cleanup for filepath in images_extracted: os.remove(filepath)
def main(pdf: str): reader = PyPDF2.PdfFileReader(pdf) page = reader.pages[30] if RES.XOBJECT in page[PG.RESOURCES]: xObject = page[PG.RESOURCES][RES.XOBJECT].getObject() for obj in xObject: if xObject[obj][IA.SUBTYPE] == "/Image": extension, byte_stream = _xobj_to_image(xObject[obj]) if extension is not None: filename = obj[1:] + ".png" with open(filename, "wb") as img: img.write(byte_stream) else: print("No image found.")