def GetPageText(pg): dl = fitz.DisplayList() dv = fitz.Device(dl) pg.run(dv, fitz.Identity) ts = fitz.TextSheet() tp = fitz.TextPage() rect = pg.bound() dl.run(fitz.Device(ts, tp), fitz.Identity, rect) return tp.extractJSON()
def pdf_show(datei, seite): page_idx = int(seite) - 1 page = PDFcfg.doc.loadPage(page_idx) # get the page irect = page.bound().round() # integer rectangle representing it pix = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), irect) # empty RGB pixmap of this size pix.clearWith(255) # clear it with color "white" dev = fitz.Device(pix) # create a "draw" device page.run(dev, fitz.Identity) # render the page pix.writePNG(datei) return
def pdf_show(pdf, page): page = pdf.loadPage(page - 1) # load the page irect = page.bound().round() # integer rectangle representing it pix = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), irect) # create an empty RGB pixmap of this size pix.clearWith(255) # clear it with color "white" dev = fitz.Device(pix) # create a "draw" device page.run(dev, fitz.Identity) # render the page data = str(pix.samples) # pixel area. NEW: returns bytearray # this function needs "data" to be a string bitmap = wx.BitmapFromBufferRGBA(irect.width, irect.height, data) # turn in wx.Bitmap # If you experience issues with this function, try the following code. # It will use "wx.BitmapFromBuffer" and thus ignore the transparency (alpha). # data2 = "".join([data[4*i:4*i+3] for i in range(len(data)/4)]) # bitmap = wx.BitmapFromBuffer(width, height, data2) return bitmap
# we create a transformation matrix here zoom = int(sys.argv[3]) rotate = int(sys.argv[4]) trans = fitz.Matrix(zoom / 100.0, zoom / 100.0).preRotate(rotate) ''' here we introduce the display list, which provides caching-mechanisms to reduce parsing of a page. first, we need to create a display list hand it over to a list device and then populate the display list by running the page through that device, with transformation applied ''' mediabox = page.rect dl = fitz.DisplayList(mediabox) dv = fitz.Device(dl) page.run(dv, trans) # get the page size, and then apply the transformation rect = mediabox.transform(trans) # create a pixmap with RGB as colorspace and bounded by irect pm = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), rect.round()) # clear it with 0xff white pm.clearWith(0xff) # fitz.Device(pm, None) is a device for drawing # we run the display list above through this drawing device # with area provided dl.run(fitz.Device(pm, None), fitz.Identity, rect) # the drawing device save the result into the pixmap
#!/usr/bin/env python import fitz f = "sdw_2015_06.pdf" d = fitz.Document(f) seiten = d.pageCount for seite in range(seiten): print "=============== processing page", seite, " ===============" pg = d.loadPage(seite) dl = fitz.DisplayList() print "ok: dl = fitz.DisplayList()" dv = fitz.Device(dl) print "ok: dv = fitz.Device(dl)" pg.run(dv, fitz.Identity) print "ok: pg.run(dv, fitz.Identity)" ts = fitz.TextSheet() print "ok: ts = fitz.TextSheet()" tp = fitz.TextPage() print "ok: tp = fitz.TextPage()" rect = pg.bound() dl.run(fitz.Device(ts, tp), fitz.Identity, rect) print "ok: dl.run(fitz.Device(ts, tp), fitz.Identity, rect)"