Python MuPdf Examples, Babel.backend.MuPdf Python Examples

Example #1

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def _bounding_box(self):

        # Determine the size of the page at 72 dpi.
        mediabox = mupdf.Rect()
        mupdf.bound_page(self._context, self._c_page, mediabox)
        # 'mediabox {}'.format(mupdf.str_rect(mediabox)))

        return mediabox

Example #2

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def _make_transform(self, scale=1, rotation=0):

        transform = mupdf.Matrix()
        # mupdf.rotate(transform, rotation)
        # mupdf.pre_scale(transform, scale, scale)
        mupdf.scale(transform, scale, scale)
        mupdf.pre_rotate(transform, rotation)

        return transform

Example #3

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def __del__(self):

        # Fixme: manage properly
        for page in self._pages.values():
            page._free() # require context
        if self._c_document is not None:
            mupdf.drop_document(self._context, self._c_document)
        if self._context is not None:
            mupdf.drop_context(self._context)

Example #4

0

Show file

File: MupdfTools.py Project: project-renard-survey/squirrel

def get_font_name(font):
    """ Return the name of a MuPDF font. """

    font_name = mupdf.decode_utf8(mupdf.get_font_name(font))
    i = font_name.find('+')
    if i:
        font_name = font_name[i + 1:]

    return font_name

Example #5

0

Show file

File: MupdfTools.py Project: FabriceSalvaire/Biblio

def get_font_name(font):

    """ Return the name of a MuPDF font. """

    font_name = mupdf.decode_utf8(mupdf.get_font_name(font))
    i = font_name.find('+')
    if i:
        font_name = font_name[i+1:]

    return font_name

Example #6

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def _make_display_list(self, no_cache=False):

        # Fixme: use it

        self._page_list = mupdf.new_display_list(self._context, mupdf.NULL)
        device = mupdf.new_list_device(self._context, page_list)
        if no_cache:
            mupdf.enable_device_hints(self._context, device, mupdf.FZ_NO_CACHE)
        mupdf.run_page_contents(self._context, page, device, mupdf.identity, mupdf.NULL)
        mupdf.close_device(self._context, device)
        mupdf.drop_device(self._context, device)

Example #7

0

Show file

File: MupdfTools.py Project: project-renard-survey/squirrel

def to_text_style(style):
    """ Convert a MuPDF style instance to a :obj:`.TextStyle` object. """

    font = style.font
    text_style = TextStyle(
        id=style.id,
        font_family=get_font_name(font),
        font_size=style.size,
        is_bold=bool(mupdf.font_is_bold(font)),
        is_italic=bool(mupdf.font_is_italic(font)),
    )

    return text_style

Example #8

0

Show file

File: MupdfTools.py Project: FabriceSalvaire/Biblio

def to_text_style(style):

    """ Convert a MuPDF style instance to a :obj:`.TextStyle` object. """

    font = style.font
    text_style = TextStyle(id=style.id,
                           font_family=get_font_name(font),
                           font_size=style.size,
                           is_bold=bool(mupdf.font_is_bold(font)),
                           is_italic=bool(mupdf.font_is_italic(font)),
                           )

    return text_style

Example #9

0

Show file

File: TextPage.py Project: FabriceSalvaire/Biblio

    def _to_style(self, c_char):

        size = c_char.size
        c_font = c_char.font

        is_bold = mupdf.font_is_bold(self._context, c_font)
        is_italic = mupdf.font_is_italic(self._context, c_font)
        font_name = mupdf.font_name(self._context, c_font)

        return TextStyle(
            font_family=font_name,
            font_size=size,
            is_bold=is_bold,
            is_italic=is_italic,
        )

Example #10

0

Show file

    def _to_style(self, c_char):

        size = c_char.size
        c_font = c_char.font

        is_bold = mupdf.font_is_bold(self._context, c_font)
        is_italic = mupdf.font_is_italic(self._context, c_font)
        font_name = mupdf.font_name(self._context, c_font)

        return TextStyle(
            font_family=font_name,
            font_size=size,
            is_bold=is_bold,
            is_italic=is_italic,
        )

Example #11

0

Show file

    def dump_text_page_xml(self, dump_char=True):

        # Fixme: old and historical code, move elsewhere ?

        text = '<page page_number="{}">\n'.format(self._page_number)
        for block in mupdf_iter.text_block_iterator(self._text_page):
            text += '<block bbox="{}">\n'.format(format_bounding_box(block))
            for line in mupdf_iter.text_line_iterator(block):
                text += ' ' * 2 + '<line bbox="{} wmode="{}" dir="{}">\n'.format(
                    format_bounding_box(line),
                    line.wmode,
                    '{0.x} {0.y}'.format(line.dir),  # :.2f
                )
                # for span in mupdf_iter.TextSpanIterator(line):
                if dump_char:
                    for char in mupdf_iter.text_char_iterator(line):
                        text += ' ' * 4 + '<char c="{}" bbox="{}" font="{}" size="{:.2f}">\n'.format(
                            chr(char.c),
                            # char.origin
                            format_bounding_box(char),
                            mupdf.font_name(self._context, char.font),
                            char.size,
                        )
                text += ' ' * 2 + '</line>\n'
            text += '</block>\n'
        text += '</page>\n'

        return text

Example #12

0

Show file

File: TextPage.py Project: FabriceSalvaire/Biblio

    def dump_text_page_xml(self, dump_char=True):

        # Fixme: old and historical code, move elsewhere ?

        text = '<page page_number="{}">\n'.format(self._page_number)
        for block in mupdf_iter.text_block_iterator(self._text_page):
            text += '<block bbox="{}">\n'.format(format_bounding_box(block))
            for line in mupdf_iter.text_line_iterator(block):
                text += ' '*2 + '<line bbox="{} wmode="{}" dir="{}">\n'.format(
                    format_bounding_box(line),
                    line.wmode,
                    '{0.x} {0.y}'.format(line.dir), # :.2f
                    )
                # for span in mupdf_iter.TextSpanIterator(line):
                if dump_char:
                    for char in mupdf_iter.text_char_iterator(line):
                        text += ' '*4 + '<char c="{}" bbox="{}" font="{}" size="{:.2f}">\n'.format(
                            chr(char.c),
                            # char.origin
                            format_bounding_box(char),
                            mupdf.font_name(self._context, char.font),
                            char.size,
                        )
                text += ' '*2 + '</line>\n'
            text += '</block>\n'
        text += '</page>\n'

        return text

Example #13

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def __init__(self, document, page_number): # or page_index

        self._document = document
        self._context = self._document._context
        self._c_document = self._document._c_document
        self._page_number = page_number
        self._c_page = mupdf.load_page(self._context, self._c_document, page_number)
        self._text_page = None

Example #14

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def __init__(self, path):

        super().__init__(path)

        self._context = None
        self._c_document = None
        self._pages = {} # page cache

        path = str(self._path).encode('utf-8')

        # try:
        # Create a context to hold the exception stack and various caches
        self._context = mupdf.new_context()
        # Register the default file types to handle
        mupdf.register_document_handlers(self._context)
        self._c_document = mupdf.open_document(self._context, path)
        # except MupdfError as exception:
        #     raise exception
        if self._c_document == mupdf.NULL:
            message = mupdf.decode_utf8(mupdf.caught_message(self._context))
            self._logger.error(message)
            raise MupdfError(message)
        self._metadata = MetaData(self)
        self._number_of_pages = mupdf.count_pages(self._context, self._c_document)
        self._document_words = None
        self._image_cache = None

Example #15

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def _to_text(self, scale=1, rotation=0):

        """ Return a :obj:`.TextPage` instance. """

        mediabox = self._bounding_box()
        transform = self._make_transform(scale, rotation)
        structured_text_options = mupdf.StructuredTextOptions()

        structured_text_page = mupdf.new_stext_page(self._context, mediabox)
        device = mupdf.new_stext_device(self._context, structured_text_page, structured_text_options)
        mupdf.run_page(self._context, self._c_page, device, transform, mupdf.NULL)
        # run_page(self._context, page_list, device)
        mupdf.close_device(self._context, device)
        mupdf.drop_device(self._context, device)

        # structured_text_page_ = mupdf.new_stext_page_from_page(self._context, self._c_page, structured_text_options)

        return TextPage(self, structured_text_page)

Example #16

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def text_direct(self):

        # Fixme: versus text

        structured_text_options = mupdf.StructuredTextOptions()
        c_buffer = mupdf.new_buffer_from_page(self._context, self._c_page, structured_text_options)
        py_buffer = mupdf.string_from_buffer(self._context, c_buffer)
        mupdf.drop_buffer(self._context, c_buffer)

        return mupdf.decode_utf8(py_buffer)

Example #17

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def __init__(self, document):

        super(MetaData, self).__init__()

        context = document._context
        c_document = document._c_document

        for key in (
                'Title',
                'Subject',
                'Author',
                'Creator',
                'Producer',
                'CreationDate',
                'ModDate',
        ):
            # Fixme: buffer size
            string = mupdf.get_meta_info(context, c_document, 'info:' + key, size=1024)
            self._dictionary[key] = string

        # fz_buffer = mupdf.pdf_metadata(c_document)
        # string = mupdf.decode_utf8(mupdf.buffer_data(fz_buffer))
        string = ''
        self._dictionary['metadata'] = string

Example #18

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def _transform_bounding_box(self,
                                rotation=0,
                                resolution=72,
                                width=0, height=0, fit=False):

        bounds = self._bounding_box()
        scale = resolution / 72.
        transform = mupdf.Matrix()
        mupdf.pre_scale(mupdf.rotate(transform, rotation), scale, scale)
        tmp_bounds = mupdf.Rect()
        mupdf.copy_rect(tmp_bounds, bounds)
        ibounds = mupdf.IRect()
        mupdf.round_rect(ibounds, mupdf.transform_rect(tmp_bounds, transform))

        # If a resolution is specified, check to see whether width/height are exceeded if not, unset them.
        if resolution != 72:
            actual_width = ibounds.x1 - ibounds.x0
            actual_height = ibounds.y1 - ibounds.y0
            if width and actual_width <= width:
                width = 0
            if height and actual_height <= height:
                height = 0

        # Now width or height will be 0 unless they need to be enforced.
        if width or height:
            scale_x = width  / (tmp_bounds.x1 - tmp_bounds.x0)
            scale_y = height / (tmp_bounds.y1 - tmp_bounds.y0)
            if fit: # ignore aspect
                if not scale_x:
                    scale_x = 1.0 # keep computed width
                elif not scale_y:
                    scale_y = 1.0 # keep computed height
            else:
                if not scale_x:
                    scale_x = scale_y
                elif not scale_y:
                    scale_y = scale_x
                else:
                    # take the smallest scale
                    if scale_x > scale_y:
                        scale_x = scale_y
                    else:
                        scale_y = scale_x
            scale_mat = mupdf.Matrix()
            mupdf.scale(scale_mat, scale_x, scale_y)
            mupdf.concat(transform, transform, scale_mat)
            mupdf.copy_rect(tmp_bounds, bounds)
            mupdf.transform_rect(tmp_bounds, transform)

        mupdf.round_rect(ibounds, tmp_bounds)

        return transform, ibounds

Example #19

0

Show file

    def _free(self):

        mupdf.drop_stext_page(self._context, self._text_page)

Example #20

0

Show file

File: TextPage.py Project: FabriceSalvaire/Biblio

    def _free(self):

        mupdf.drop_stext_page(self._context, self._text_page)

Example #21

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def to_pixmap(self,
                  rotation=0,
                  resolution=72,
                  width=None, height=None, fit=False,
                  antialiasing_level=8,
                  ):

        transform, bounding_box = self._transform_bounding_box(rotation,
                                                               resolution,
                                                               width, height, fit)

        width, height = mupdf.rect_width_height(bounding_box)
        np_array = np.zeros((height, width, 4), dtype=np.uint8)
        color_space = mupdf.device_rgb(self._context)
        use_alpha = True
        pixmap = mupdf.new_pixmap_with_bbox_and_data(self._context,
                                                     color_space,
                                                     bounding_box,
                                                     mupdf.NULL,
                                                     use_alpha,
                                                     mupdf.np_array_uint8_ptr(np_array))
        mupdf.clear_pixmap_with_value(self._context, pixmap, 255) # 0xff

        device = mupdf.new_draw_device(self._context, mupdf.NULL, pixmap)
        mupdf.set_aa_level(self._context, antialiasing_level)
        mupdf.run_page(self._context, self._c_page, device, transform, mupdf.NULL)
        mupdf.close_device(self._context, device)
        mupdf.drop_device(self._context, device)
        mupdf.drop_pixmap(self._context, pixmap)

        return np_array

Example #22

0

Show file

File: PdfDocument.py Project: project-renard-survey/squirrel

    def _free(self):

        if self._text_page is not None:
            self._text_page._free()
        mupdf.drop_page(self._context, self._c_page)