Example #1
0
 def __init__(self, rsrc, codec='utf-8', pagenum=True, pagepad=50, scale=1):
     PDFPageAggregator.__init__(self, rsrc)
     self.pagenum = pagenum
     self.pagepad = pagepad
     self.scale = scale
     self.yoffset = self.pagepad
     self.codec = codec
     self.pages = []
Example #2
0
    def end_page(self, page):
        PDFPageAggregator.end_page(self, page)
        page = self.cur_item
        layout = {}
        pagevals = []

        (x0,y0,x1,y1) = page.bbox
        self.yoffset += y1

        if self.pagenum:
            pagevals.append('Page %s\n' % page.id)

        for child in page.objs:
            childdata = self._parse_obj(child)
            if childdata is not None:
                ykey, xkey, val = childdata
                layout.setdefault(ykey, {})[xkey] = val

        self.yoffset += self.pagepad

        #for xkey in sorted(layout.keys()):
        #    vals = []
        #    for ykey in sorted(layout[xkey].keys()):
        #        vals.append(layout[xkey][ykey])
        #        vals.append('\n')

        #    pagevals.append(''.join(vals))

        xkeys = layout.keys()
        xkeys.sort()
        pagevals = []
        for xkey in xkeys:
            vals = []
            ykeys = layout[xkey].keys()
            ykeys.sort()
            for ykey in ykeys:
                vals.append(layout[xkey][ykey])
            vals.append('\n')
            pagevals.append(''.join(vals))

        pagevals.append('\n')
        self.pages.append(''.join(pagevals))
Example #3
0
 def __init__(self, rsrc, outfp, codec='ascii'):
   PDFPageAggregator.__init__(self, rsrc)
   self.outfp = outfp
   self.codec = codec
   return