Esempio n. 1
0
def total_image_area(page: fitz.Page) -> int:
    """Returns the total area (in pixels) consumed by images that appear
    in `page`.
    Does not account for overlap between images, so it is possible for
    the total computed area to exceed the actual area of the page.
    """
    return sum(
        rect.getArea()
        for image in page.get_images()
        for rect in page.get_image_rects(image)  # type: ignore
    )
Esempio n. 2
0
    def __init__(self, doc: fitz.Document, page: fitz.Page) -> None:
        self.doc = doc
        self._page = page

        # Page's cropbox (to help identify what part of an image is actually
        # being displayed), already rotated
        self.bbox = page.bound()

        # Cross-referenced images (ignore masks since they can't be easily downsampled)
        all_xref_images = list(map(self._build_xref_image, page.get_images(full=True)))
        smask_xrefs = set(filter(None, map(lambda xref_image: xref_image['smask'], all_xref_images)))
        self.xref_images = list(filter(lambda xref_image: xref_image['xref'] not in smask_xrefs, all_xref_images))

        # Lazy, memoized attributes

        # Match block numbers to image hashes
        self._block_hashes = None