def trace_runpage(use_display_list, doc, number): page = mupdf.Page(doc, number - 1) mediabox = page.bound_page() print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">' ) output = mupdf.Output(mupdf.Output.Fixed_STDOUT) dev = mupdf.Device(output) if use_display_list: list_ = mupdf.DisplayList(page) list_.run_display_list(dev, mupdf.Matrix(mupdf.fz_identity), mupdf.Rect(mupdf.fz_infinite_rect), mupdf.Cookie()) else: page.run_page(dev, mupdf.Matrix(mupdf.fz_identity), mupdf.Cookie()) output.close_output() print('</page>')
def __init__(self): self.num = 0 self.band = 0 self.list = None self.ctm = None self.tbounds = None self.pix = None self.bit = None self.cookie = mupdf.Cookie()
def convert_to_pdf(document, page_from=0, page_to=-1, rotate=0): num_pages = document.count_pages() page_from = max(page_from, 0) page_from = min(page_from, num_pages - 1) if page_to < 0: page_to = num_pages - 1 page_to = min(page_to, num_pages - 1) page_delta = 1 if page_to < page_from: p = page_to page_to = page_from page_from = p page_delta = -1 document_out = mupdf.PdfDocument() while rotate < 0: rotate += 360 while rotate >= 360: rotate -= 360 if rotate % 90 != 0: rotate = 0 for p in range(page_from, page_to + page_delta, page_delta): page = document.load_page(p) rect = page.bound_page() dev, resources, contents = document_out.page_write(rect) page.run_page(dev, mupdf.Matrix(), mupdf.Cookie()) pdf_obj = document_out.add_page(rect, rotate, resources, contents) document_out.insert_page(-1, pdf_obj) write_options = mupdf.PdfWriteOptions() write_options.do_garbage = 4 write_options.do_compress = 1 write_options.do_compress_images = 1 write_options.do_compress_fonts = 1 write_options.do_sanitize = 1 write_options.do_incremental = 0 write_options.do_ascii = 0 write_options.do_decompress = 0 write_options.do_linear = 0 write_options.do_clean = 1 write_options.do_pretty = 0 buffer_ = mupdf.Buffer(8192) output = mupdf.Output(buffer_) document_out.write_document(output, write_options) size, data = buffer_.buffer_extract_raw() print(f'buffer_.buffer_extract() returned: {size, data}') return data, size
def show_html(self): ''' Convert to HTML using Extract, and show in new window using PyQt5.QtWebKitWidgets.QWebView. ''' buffer_ = self.page.new_buffer_from_page_with_format( format="docx", options="html", transform=mupdf.Matrix(1, 0, 0, 1, 0, 0), cookie=mupdf.Cookie(), ) html_content = buffer_.buffer_extract().decode('utf8') # Show in a new window using Qt's QWebView. self.webview = PyQt5.QtWebKitWidgets.QWebView() self.webview.setHtml(html_content) self.webview.show()
def convert_runpage( doc, number, out): page = mupdf.Page( doc, number - 1) mediabox = page.bound_page() dev = out.begin_page(mediabox) page.run( dev, mupdf.Matrix(mupdf.fz_identity), mupdf.Cookie()) out.end_page()
def test(path): ''' Runs various mupdf operations on <path>, which is assumed to be a file that mupdf can open. ''' log(f'testing path={path}') assert os.path.isfile(path) global g_test_n g_test_n += 1 # See notes in mupdfwrap.py:build_swig() about buffer_extract() and # buffer_storage(). # assert getattr(mupdf.Buffer, 'buffer_storage_raw') assert getattr(mupdf.Buffer, 'buffer_storage', None) is None assert getattr(mupdf.Buffer, 'buffer_extract_raw') assert getattr(mupdf.Buffer, 'buffer_extract') # Test operations using functions: # log('Testing functions.') log(f' Opening: %s' % path) document = mupdf.open_document(path) log(f' mupdf.needs_password(document)={mupdf.needs_password(document)}') log(f' mupdf.count_pages(document)={mupdf.count_pages(document)}') log(f' mupdf.document_output_intent(document)={mupdf.document_output_intent(document)}' ) # Test operations using classes: # log(f'Testing classes') document = mupdf.Document(path) log(f'Have created mupdf.Document for {path}') log(f'document.needs_password()={document.needs_password()}') log(f'document.count_pages()={document.count_pages()}') if 0: log(f'stext info:') show_stext(document) for k in ( 'format', 'encryption', 'info:Author', 'info:Title', 'info:Creator', 'info:Producer', 'qwerty', ): v = document.lookup_metadata(k) log(f'document.lookup_metadata() k={k} returned v={v!r}') if k == 'qwerty': assert v is None, f'v={v!r}' else: pass zoom = 10 scale = mupdf.Matrix.scale(zoom / 100., zoom / 100.) page_number = 0 log(f'Have created scale: a={scale.a} b={scale.b} c={scale.c} d={scale.d} e={scale.e} f={scale.f}' ) colorspace = mupdf.Colorspace(mupdf.Colorspace.Fixed_RGB) log(f'{colorspace.m_internal.key_storable.storable.refs}') if 0: c = colorspace.clamp_color([3.14]) log('colorspace.clamp_color returned c={c}') pixmap = mupdf.Pixmap(document, page_number, scale, colorspace, 0) log(f'Have created pixmap: {pixmap.m_internal.w} {pixmap.m_internal.h} {pixmap.m_internal.stride} {pixmap.m_internal.n}' ) filename = f'mupdf_test-out1-{g_test_n}.png' pixmap.save_pixmap_as_png(filename) log(f'Have created {filename} using pixmap.save_pixmap_as_png().') # Print image data in ascii PPM format. Copied from # mupdf/docs/examples/example.c. # samples = pixmap.samples() stride = pixmap.stride() n = pixmap.n() filename = f'mupdf_test-out2-{g_test_n}.ppm' with open(filename, 'w') as f: f.write('P3\n') f.write('%s %s\n' % (pixmap.m_internal.w, pixmap.m_internal.h)) f.write('255\n') for y in range(0, pixmap.m_internal.h): for x in range(pixmap.m_internal.w): if x: f.write(' ') offset = y * stride + x * n if hasattr(mupdf, 'bytes_getitem'): # swig f.write('%3d %3d %3d' % ( mupdf.bytes_getitem(samples, offset + 0), mupdf.bytes_getitem(samples, offset + 1), mupdf.bytes_getitem(samples, offset + 2), )) else: # cppyy f.write('%3d %3d %3d' % ( samples[offset + 0], samples[offset + 1], samples[offset + 2], )) f.write('\n') log(f'Have created {filename} by scanning pixmap.') # Generate .png and but create Pixmap from Page instead of from Document. # page = mupdf.Page(document, 0) separations = page.page_separations() log(f'page_separations() returned {"true" if separations else "false"}') pixmap = mupdf.Pixmap(page, scale, colorspace, 0) filename = f'mupdf_test-out3-{g_test_n}.png' pixmap.save_pixmap_as_png(filename) log(f'Have created {filename} using pixmap.save_pixmap_as_png()') # Show links log(f'Links.') page = mupdf.Page(document, 0) link = mupdf.load_links(page.m_internal) log(f'{link}') if link: for i in link: log(f'{i}') # Check we can iterate over Link's, by creating one manually. # link = mupdf.Link(mupdf.Rect(0, 0, 1, 1), "hello") log(f'items in <link> are:') for i in link: log(f' {i.m_internal.refs} {i.m_internal.uri}') # Check iteration over Outlines. # log(f'Outlines.') outline = mupdf.Outline(document) log(f'outline.m_internal={outline.m_internal}') if outline.m_internal: log(f'{outline.uri()} {outline.page()} {outline.x()} {outline.y()} {outline.is_open()} {outline.title()}' ) log(f'items in outline tree are:') for o in outline: log(f' {o.uri()} {o.page()} {o.x()} {o.y()} {o.is_open()} {o.title()}' ) # Check iteration over StextPage. # log(f'StextPage.') stext_options = mupdf.StextOptions(0) page_num = 40 try: stext_page = mupdf.StextPage(document, page_num, stext_options) except Exception: log(f'no page_num={page_num}') else: device_stext = mupdf.Device(stext_page, stext_options) matrix = mupdf.Matrix() page = mupdf.Page(document, 0) cookie = mupdf.Cookie() page.run_page(device_stext, matrix, cookie) log(f' stext_page is:') for block in stext_page: log(f' block:') for line in block: line_text = '' for char in line: line_text += chr(char.m_internal.c) log(f' {line_text}') device_stext.close_device() # Check copy-constructor. log(f'Checking copy-constructor') document2 = mupdf.Document(document) del document page = mupdf.Page(document2, 0) scale = mupdf.Matrix() pixmap = mupdf.Pixmap(page, scale, colorspace, 0) pixmap.save_pixmap_as_png('mupdf_test-out3.png') stdout = mupdf.Output(mupdf.Output.Fixed_STDOUT) log(f'{type(stdout)} {stdout.m_internal.state}') mediabox = page.bound_page() out = mupdf.DocumentWriter(filename, 'png', '', mupdf.DocumentWriter.FormatPathType_DOCUMENT) dev = out.begin_page(mediabox) page.run_page(dev, mupdf.Matrix(mupdf.fz_identity), mupdf.Cookie()) out.end_page() # Check out-params are converted into python return value. bitmap = mupdf.Bitmap(10, 20, 8, 72, 72) bitmap_details = bitmap.bitmap_details() log(f'{bitmap_details}') assert list(bitmap_details) == [10, 20, 8, 12], f'bitmap_details={bitmap_details!r}' log(f'finished test of %s' % path)
def drawpage(doc, pagenum): list_ = None cookie = mupdf.Cookie() seps = None features = "" start = gettime() if state.showtime else 0 page = mupdf.Page(doc, pagenum - 1) if state.spots != SPOTS_NONE: seps = page.page_separations() if seps.m_internal: n = seps.count_separations() if state.spots == SPOTS_FULL: for i in range(n): seps.set_separation_behavior(i, mupdf.FZ_SEPARATION_SPOT) else: for i in range(n): seps.set_separation_behavior(i, mupdf.FZ_SEPARATION_COMPOSITE) elif page.page_uses_overprint(): # This page uses overprint, so we need an empty # sep object to force the overprint simulation on. seps = mupdf.Separations(0) elif state.oi and state.oi.m_internal and state.oi.colorspace_n( ) != state.colorspace.colorspace_n(): # We have an output intent, and it's incompatible # with the colorspace our device needs. Force the # overprint simulation on, because this ensures that # we 'simulate' the output intent too. */ seps = mupdf.Separations(0) if state.uselist: list_ = mupdf.DisplayList(page.bound_page()) dev = mupdf.Device(list_) if state.lowmemory: dev.enable_device_hints(FZ_NO_CACHE) page.run(dev, mupdf.Matrix(), cookie) dev.close_device() if bgprint.active and state.showtime: end = gettime() start = end - start if state.showfeatures: # SWIG doesn't appear to handle the out-param is_color in # mupdf.Device() constructor that wraps fz_new_test_device(), so we use # the underlying mupdf function() instead. # dev, iscolor = mupdf.new_test_device(0.02, 0, None) dev = mupdf.Device(dev) if state.lowmemory: dev.enable_device_hints(mupdf.FZ_NO_CACHE) if list_: list_.run_display_list(dev, mupdf.Matrix(mupdf.fz_identity), mupdf.Rect(mupdf.fz_infinite_rect), mupdf.Cookie()) else: page.run(dev, fz_identity, cookie) dev.close_device() features = " color" if iscolor else " grayscale" if state.output_file_per_page: bgprint_flush() if state.out: state.out.close_output() text_buffer = mupdf.format_output_path(state.output, pagenum) state.out = mupdf.Output(text_buffer, 0) if bgprint.active: bgprint_flush() if bgprint.active: if not state.quiet or state.showfeatures or state.showtime or state.showmd5: sys.stderr.write("page %s %d%s" % (state.filename, pagenum, features)) bgprint.started = 1 bgprint.page = page bgprint.list = list_ bgprint.seps = seps bgprint.filename = state.filename bgprint.pagenum = pagenum bgprint.interptime = start else: if not state.quiet or state.showfeatures or state.showtime or state.showmd5: sys.stderr.write("page %s %d%s" % (state.filename, pagenum, features)) dodrawpage(page, list_, pagenum, cookie, start, 0, state.filename, 0, seps)
def dodrawpage(page, list_, pagenum, cookie, start, interptime, filename, bg, seps): if state.output_file_per_page: file_level_headers() if list_: mediabox = mupdf.Rect(list_) else: mediabox = page.bound_page() if state.output_format == OUT_TRACE: state.out.write_string( "<page mediabox=\"%g %g %g %g\">\n" % (mediabox.x0, mediabox.y0, mediabox.x1, mediabox.y1)) dev = mupdf.Device(state.out) if state.lowmemory: dev.enable_device_hints(mupdf.FZ_NO_CACHE) if list_: list_.run_display_list(dev, mupdf.Matrix(), mupdf.Rect(mupdf.fz_infinite_rect), cookie) else: page.run(dev, fz_identity, cookie) state.out.write_string("</page>\n") dev.close_device() dev = None elif state.output_format == OUT_XMLTEXT: state.out.write_string( "<page mediabox=\"%g %g %g %g\">\n" % (mediabox.x0, mediabox.y0, mediabox.x1, mediabox.y1)) dev = mupdf.Device.new_raw_device(state.out) if list_: list_.run_display_list(dev, mupdf.Matrix(), mupdf.Rect(mupdf.fz_infinite_rect), cookie) else: page.run(dev, fz_identity, cookie) state.out.write_string("</page>\n") dev.close_device() dev = None elif state.output_format == OUT_BBOX: bbox = mupdf.Rect(mupdf.Rect.Fixed_EMPTY) dev = mupdf.Device(bbox) if state.lowmemory: dev.enable_device_hints(mupdf.FZ_NO_CACHE) if list_: list_.run_display_list(dev, fz_identity, mupdf.Rect(mupdf.fz_infinite_rect), cookie) else: page.run(dev, fz_identity, cookie) dev.close_device() state.out.write_string( "<page bbox=\"%s %s %s %s\" mediabox=\"%s %s %s %s\" />\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1, mediabox.x0, mediabox.y0, mediabox.x1, mediabox.y1, ) elif state.output_format in (OUT_TEXT, OUT_HTML, OUT_XHTML, OUT_STEXT): zoom = state.resolution / 72 ctm = mupdf.Matrix( mupdf.pre_scale(mupdf.rotate(state.rotation), zoom, zoom)) stext_options = mupdf.StextOptions() stext_options.flags = mupdf.FZ_STEXT_PRESERVE_IMAGES if ( state.output_format == OUT_HTML or state.output_format == OUT_XHTML) else 0 text = mupdf.StextPage(mediabox) dev = mupdf.Device(text, stext_options) if state.lowmemory: fz_enable_device_hints(dev, FZ_NO_CACHE) if list_: list_.run_display_list(dev, ctm, mupdf.Rect(mupdf.fz_infinite_rect), cookie) else: page.run(dev, ctm, cookie) dev.close_device() dev = None if state.output_format == OUT_STEXT: state.out.print_stext_page_as_xml(text, pagenum) elif state.output_format == OUT_HTML: state.out.print_stext_page_as_html(text, pagenum) elif state.output_format == OUT_XHTML: state.out.print_stext_page_as_xhtml(text, pagenum) elif state.output_format == OUT_TEXT: state.out.print_stext_page_as_text(text) state.out.write_string("\f\n") elif state.output_format == OUT_SVG: zoom = state.resolution / 72 ctm = mupdf.Matrix(zoom, zoom) ctm.pre_rotate(state.rotation) tbounds = mupdf.Rect(mediabox, ctm) if not state.output or state.output == "-": state.out = mupdf.Output(mupdf.Output.Fixed_STDOUT) else: buf = mupdf.format_output_path(state.output, pagenum) state.out = mupdf.Output(buf, 0) dev = mupdf.Device(state.out, tbounds.x1 - tbounds.x0, tbounds.y1 - tbounds.y0, mupdf.FZ_SVG_TEXT_AS_PATH, 1) if state.lowmemory: dev.enable_device_hints(dev, mupdf.FZ_NO_CACHE) if list_: list_.run_display_list(dev, ctm, tbounds, cookie) else: page.run(dev, ctm, cookie) dev.close_device() state.out.close_output() else: zoom = state.resolution / 72 m = mupdf.rotate(state.rotation) ctm = mupdf.Matrix( mupdf.pre_scale(mupdf.rotate(state.rotation), zoom, zoom)) tbounds = mupdf.Rect(mediabox, ctm) ibounds = tbounds.round_rect() # Make local copies of our width/height w = state.width h = state.height # If a resolution is specified, check to see whether w/h are # exceeded; if not, unset them. */ if state.res_specified: t = ibounds.x1 - ibounds.x0 if w and t <= w: w = 0 t = ibounds.y1 - ibounds.y0 if h and t <= h: h = 0 # Now w or h will be 0 unless they need to be enforced. if w or h: scalex = w / (tbounds.x1 - tbounds.x0) scaley = h / (tbounds.y1 - tbounds.y0) if state.fit: if w == 0: scalex = 1.0 if h == 0: scaley = 1.0 else: if w == 0: scalex = scaley if h == 0: scaley = scalex if not state.fit: if scalex > scaley: scalex = scaley else: scaley = scalex scale_mat = mupdf.Matrix.scale(scalex, scaley) ctm = mupdf.Matrix( mupdf.concat(ctm.internal(), scale_mat.internal())) tbounds = mupdf.Rect(mediabox, ctm) ibounds = tbounds.round_rect() tbounds = ibounds.rect_from_irect() band_ibounds = ibounds bands = 1 totalheight = ibounds.y1 - ibounds.y0 drawheight = totalheight if state.band_height != 0: # Banded rendering; we'll only render to a # given height at a time. drawheight = state.band_height if totalheight > state.band_height: band_ibounds.y1 = band_ibounds.y0 + state.band_height bands = (totalheight + state.band_height - 1) / state.band_height tbounds.y1 = tbounds.y0 + state.band_height + 2 #DEBUG_THREADS(("Using %d Bands\n", bands)); if state.num_workers > 0: for band in range(min(state.num_workers, bands)): state.workers[band].band = band state.workers[band].ctm = ctm state.workers[band].tbounds = tbounds state.workers[band].cookie = mupdf.Cookie() state.workers[band].list = list_ state.workers[band].pix = mupdf.Pixmap(state.colorspace, band_ibounds, seps, state.alpha) state.workers[band].pix.set_pixmap_resolution( state.resolution, state.resolution) ctm.f -= drawheight pix = state.workers[0].pix else: pix = mupdf.Pixmap(state.colorspace, band_ibounds, seps, state.alpha) pix.set_pixmap_resolution(int(state.resolution), int(state.resolution)) # Output any page level headers (for banded formats) if state.output: state.bander = None if state.output_format == OUT_PGM or state.output_format == OUT_PPM or state.output_format == OUT_PNM: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PNM) elif state.output_format == OUT_PAM: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PAM) elif state.output_format == OUT_PNG: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PNG) elif state.output_format == OUT_PBM: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PBM) elif state.output_format == OUT_PKM: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PKM) elif state.output_format == OUT_PS: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PS) elif state.output_format == OUT_PSD: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.PSD) elif state.output_format == OUT_PWG: if state.out_cs == CS_MONO: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.MONO, mupdf.PwgOptions()) else: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.COLOR, mupdf.PwgOptions()) elif state.output_format == OUT_PCL: if state.out_cs == CS_MONO: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.MONO, mupdf.PclOptions()) else: state.bander = mupdf.BandWriter(state.out, mupdf.BandWriter.COLOR, mupdf.PclOptions()) if state.bander: state.bander.write_header(pix.w(), totalheight, pix.n(), pix.alpha(), pix.xres(), pix.yres(), state.output_pagenum, pix.colorspace(), pix.seps()) state.output_pagenum += 1 for band in range(bands): if state.num_workers > 0: w = state.workers[band % state.num_workers] pix = w.pix bit = w.bit w.bit = None cookie.increment_errors(w.cookie.errors()) else: bit = drawband(page, list_, ctm, tbounds, cookie, band * state.band_height, pix) if state.output: if state.bander: if bit: state.bander.write_band(bit.stride(), drawheight, bit.samples()) else: state.bander.write_band(pix.stride(), drawheight, pix.samples()) bit = None if state.num_workers > 0 and band + state.num_workers < bands: w = state.workers[band % state.num_workers] w.band = band + state.num_workers w.ctm = ctm w.tbounds = tbounds w.cookie = mupdf.Cookie() ctm.f -= drawheight # FIXME if state.showmd5: digest = pix.md5_pixmap() sys.stderr.write(' ') for i in range(16): sys.stderr.write('%02x', digest[i]) if state.output_file_per_page: file_level_trailers() if state.showtime: end = gettime() diff = end - start if bg: if diff + interptime < timing.min: timing.min = diff + interptime timing.mininterp = interptime timing.minpage = pagenum timing.minfilename = filename if diff + interptime > timing.max: timing.max = diff + interptime timing.maxinterp = interptime timing.maxpage = pagenum timing.maxfilename = filename timing.count += 1 sys.stderr.write( " %dms (interpretation) %dms (rendering) %dms (total)" % (interptime, diff, diff + interptime)) else: if diff < timing.min: timing.min = diff timing.minpage = pagenum timing.minfilename = filename if diff > timing.max: timing.max = diff timing.maxpage = pagenum timing.maxfilename = filename timing.total += diff timing.count += 1 sys.stderr.write(" %dms" % diff) if not state.quiet or state.showfeatures or state.showtime or state.showmd5: sys.stderr.write("\n") if state.lowmemory: mupdf.empty_store() if state.showmemory: mupdf.dump_glyph_cache_stats(mupdf.stderr_()) mupdf.flush_warnings() if cookie.get_errors(): state.errored = 1