Python Page.getImageBbox Examples

Programming Language: Python

Namespace/Package Name: fitz

Class/Type: Page

Method/Function: getImageBbox

Examples at hotexamples.com: 3

Python Page.getImageBbox - 3 examples found. These are the top rated real world Python examples of fitz.Page.getImageBbox extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getPixmap(7)

getText(7)

getImageBbox(3)

getImageList(3)

get_contents(3)

readContents(2)

get_pixmap(2)

annots(2)

bound(2)

cleanContents(2)

get_images(2)

get_xobjects(1)

get_texttrace(1)

get_text_blocks(1)

get_text(1)

_getContents(1)

get_image_rects(1)

get_drawings(1)

_wrapContents(1)

getContents(1)

getTextPage(1)

Example #1

Show file

File: ImagesExtractor.py Project: ripsita/FileConverter

    def extract_images(cls,
                       page: fitz.Page,
                       clip_image_res_ratio: float = 3.0):
        """Extract normal images with ``Page.getImageList()``.

        Args:
            page (fitz.Page): pdf page to extract images.
            clip_image_res_ratio (float, optional): Resolution ratio of clipped bitmap. Defaults to 3.0.

        Returns:
            list: A list of extracted and recovered image raw dict.
        
        .. note::
            ``Page.getImageList()`` contains each image only once, which may less than the real count of images in a page.
        """
        # pdf document
        doc = page.parent

        # check each image item:
        # (xref, smask, width, height, bpc, colorspace, ...)
        images = []
        for item in page.getImageList(full=True):
            # should always wrap getImageBbox in a try-except clause, per
            # https://github.com/pymupdf/PyMuPDF/issues/487
            try:
                item = list(item)
                item[-1] = 0
                bbox = page.getImageBbox(
                    item)  # item[7]: name entry of such an item
            except ValueError:
                continue

            # ignore images outside page
            if not bbox.intersects(page.rect): continue

            # recover image
            pix = cls._recover_pixmap(doc, item)

            # regarding images consist of alpha values only, i.e. colorspace is None,
            # the turquoise color shown in the PDF is not part of the image, but part of PDF background.
            # So, just to clip page pixmap according to the right bbox
            # https://github.com/pymupdf/PyMuPDF/issues/677
            if not pix.colorspace:
                pix = cls._clip_page(page, bbox, zoom=clip_image_res_ratio)

            raw_dict = cls._to_raw_dict(pix, bbox)
            images.append(raw_dict)
        return images

Example #2

Show file

    def extract_images(
        cls,
        page: fitz.Page,
        clip_image_res_ratio: float = 3.0  # resolution ratio of cliiped bitmap
    ):
        ''' Get images dict based on image contents from `Page.getImageList()`.

            NOTE: Page.getImageList() contains each image only once, which may less than the real
            count if images in a page.
        '''
        # pdf document
        doc = page.parent

        # check each image item:
        # (xref, smask, width, height, bpc, colorspace, ...)
        images = []
        for item in page.getImageList(full=True):
            # should always wrap getImageBbox in a try-except clause, per
            # https://github.com/pymupdf/PyMuPDF/issues/487
            try:
                item = list(item)
                item[-1] = 0
                bbox = page.getImageBbox(
                    item)  # item[7]: name entry of such an item
            except ValueError:
                continue

            # ignore images outside page
            if not bbox.intersects(page.rect): continue

            pix = ImagesExtractor.recover_pixmap(doc, item)

            # regarding images consist of alpha values only, i.e. colorspace is None,
            # the turquoise color shown in the PDF is not part of the image, but part of PDF background.
            # So, just to clip page pixmap according to the right bbox
            # https://github.com/pymupdf/PyMuPDF/issues/677
            if not pix.colorspace:
                raw_dict = cls.clip_page(page, bbox, zoom=clip_image_res_ratio)
            else:
                raw_dict = cls.to_raw_dict(pix, bbox)
            images.append(raw_dict)
        return images

Example #3

Show file

File: Image.py Project: silianpan/pdf2docx

    def extract_images(cls, page:fitz.Page):
        '''Get images from current page.'''
        # pdf document
        doc = page.parent

        # check each image item:
        # (xref, smask, width, height, bpc, colorspace, ...)
        images = []
        for item in page.getImageList(full=True):
            pix = recover_pixmap(doc, item)
            bbox = page.getImageBbox(item[7]) # item[7]: name entry of such an item

            # regarding images consist of alpha values only, i.e. colorspace is None,
            # the turquoise color shown in the PDF is not part of the image, but part of PDF background.
            # So, just to clip page pixmap according to the right bbox
            # https://github.com/pymupdf/PyMuPDF/issues/677
            if not pix.colorspace:
                raw_dict = cls.clip_page(page, bbox, zoom=3.0)
            else:
                raw_dict = cls.to_raw_dict(pix, bbox)
            images.append(raw_dict)
        return images