예제 #1
0
def GetPageText(pg):
    dl = fitz.DisplayList()
    dv = fitz.Device(dl)
    pg.run(dv, fitz.Identity)
    ts = fitz.TextSheet()
    tp = fitz.TextPage()
    rect = pg.bound()
    dl.run(fitz.Device(ts, tp), fitz.Identity, rect)
    return tp.extractJSON()
예제 #2
0
    ln = ln.next

# we create a transformation matrix here
zoom = int(sys.argv[3])
rotate = int(sys.argv[4])
trans = fitz.Matrix(zoom / 100.0, zoom / 100.0).preRotate(rotate)
'''
here we introduce the display list, which provides caching-mechanisms
to reduce parsing of a page.
first, we need to create a display list
hand it over to a list device
and then populate the display list by running the page through that device,
with transformation applied
'''
mediabox = page.rect
dl = fitz.DisplayList(mediabox)
dv = fitz.Device(dl)
page.run(dv, trans)
# get the page size, and then apply the transformation
rect = mediabox.transform(trans)

# create a pixmap with RGB as colorspace and bounded by irect
pm = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), rect.round())
# clear it with 0xff white
pm.clearWith(0xff)

# fitz.Device(pm, None) is a device for drawing
# we run the display list above through this drawing device
# with area provided
dl.run(fitz.Device(pm, None), fitz.Identity, rect)
예제 #3
0
#!/usr/bin/env python
import fitz

f = "sdw_2015_06.pdf"
d = fitz.Document(f)
seiten = d.pageCount

for seite in range(seiten):
    print "=============== processing page", seite, " ==============="
    pg = d.loadPage(seite)
    dl = fitz.DisplayList()
    print "ok: dl = fitz.DisplayList()"
    dv = fitz.Device(dl)
    print "ok: dv = fitz.Device(dl)"
    pg.run(dv, fitz.Identity)
    print "ok: pg.run(dv, fitz.Identity)"
    ts = fitz.TextSheet()
    print "ok: ts = fitz.TextSheet()"
    tp = fitz.TextPage()
    print "ok: tp = fitz.TextPage()"
    rect = pg.bound()
    dl.run(fitz.Device(ts, tp), fitz.Identity, rect)
    print "ok: dl.run(fitz.Device(ts, tp), fitz.Identity, rect)"