def __init__(self, rsrc, codec='utf-8', pagenum=True, pagepad=50, scale=1): PDFPageAggregator.__init__(self, rsrc) self.pagenum = pagenum self.pagepad = pagepad self.scale = scale self.yoffset = self.pagepad self.codec = codec self.pages = []
def end_page(self, page): PDFPageAggregator.end_page(self, page) page = self.cur_item layout = {} pagevals = [] (x0, y0, x1, y1) = page.bbox self.yoffset += y1 if self.pagenum: pagevals.append('Page %s\n' % page.id) for child in page.objs: childdata = self._parse_obj(child) if childdata is not None: ykey, xkey, val = childdata layout.setdefault(ykey, {})[xkey] = val self.yoffset += self.pagepad #for xkey in sorted(layout.keys()): # vals = [] # for ykey in sorted(layout[xkey].keys()): # vals.append(layout[xkey][ykey]) # vals.append('\n') # pagevals.append(''.join(vals)) xkeys = layout.keys() xkeys.sort() pagevals = [] for xkey in xkeys: vals = [] ykeys = layout[xkey].keys() ykeys.sort() for ykey in ykeys: vals.append(layout[xkey][ykey]) vals.append('\n') pagevals.append(''.join(vals)) pagevals.append('\n') self.pages.append(''.join(pagevals))
def end_page(self, page): PDFPageAggregator.end_page(self, page) page = self.cur_item layout = {} pagevals = [] (x0,y0,x1,y1) = page.bbox self.yoffset += y1 if self.pagenum: pagevals.append('Page %s\n' % page.id) for child in page.objs: childdata = self._parse_obj(child) if childdata is not None: ykey, xkey, val = childdata layout.setdefault(ykey, {})[xkey] = val self.yoffset += self.pagepad #for xkey in sorted(layout.keys()): # vals = [] # for ykey in sorted(layout[xkey].keys()): # vals.append(layout[xkey][ykey]) # vals.append('\n') # pagevals.append(''.join(vals)) xkeys = layout.keys() xkeys.sort() pagevals = [] for xkey in xkeys: vals = [] ykeys = layout[xkey].keys() ykeys.sort() for ykey in ykeys: vals.append(layout[xkey][ykey]) vals.append('\n') pagevals.append(''.join(vals)) pagevals.append('\n') self.pages.append(''.join(pagevals))
def __init__(self, rsrc, outfp, codec='ascii'): PDFPageAggregator.__init__(self, rsrc) self.outfp = outfp self.codec = codec return