def GetPageText(pg): dl = fitz.DisplayList() dv = fitz.Device(dl) pg.run(dv, fitz.Identity) ts = fitz.TextSheet() tp = fitz.TextPage() rect = pg.bound() dl.run(fitz.Device(ts, tp), fitz.Identity, rect) return tp.extractJSON()
# and the page is no longer needed for drawing pixmap now # we can drop those resources page = None dv = None pm = None # In order to re-draw the pixmap, we just need to run the display list again # first, setup the pixmap and its drawing device pm1 = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), rect.round()) pm1.clearWith(0xff) # then, run the display list, which already contains drawing commands dl.run(fitz.Device(pm1, None), fitz.Identity, rect) # now let's do text search # first, we need text sheet and text page ts = fitz.TextSheet() tp = fitz.TextPage(mediabox) # and run the display list through a text device which is created from # text page and text sheet dl.run(fitz.Device(ts, tp), fitz.Identity, rect) # now we are ready for search, with max hit count limited to 4 # the return result is a list of hit box rect res = tp.search(sys.argv[6], 4) for r in res: # we invert the pixmap at the hit irect to highlight the search result pm1.invertIRect(r.round()) # and finally write to another PNG pm1.writePNG('dl-' + sys.argv[5])