def total_image_area(page: fitz.Page) -> int: """Returns the total area (in pixels) consumed by images that appear in `page`. Does not account for overlap between images, so it is possible for the total computed area to exceed the actual area of the page. """ return sum( rect.getArea() for image in page.get_images() for rect in page.get_image_rects(image) # type: ignore )
def __init__(self, doc: fitz.Document, page: fitz.Page) -> None: self.doc = doc self._page = page # Page's cropbox (to help identify what part of an image is actually # being displayed), already rotated self.bbox = page.bound() # Cross-referenced images (ignore masks since they can't be easily downsampled) all_xref_images = list(map(self._build_xref_image, page.get_images(full=True))) smask_xrefs = set(filter(None, map(lambda xref_image: xref_image['smask'], all_xref_images))) self.xref_images = list(filter(lambda xref_image: xref_image['xref'] not in smask_xrefs, all_xref_images)) # Lazy, memoized attributes # Match block numbers to image hashes self._block_hashes = None