def render(item): if isinstance(item, LTPage): self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' % (item.id, strbbox(item.bbox), item.rotate)) for child in item: render(child) self.outfp.write('</page>\n') elif isinstance(item, LTLine) and item.direction: self.outfp.write('<line linewidth="%d" direction="%s" bbox="%s" />\n' % (item.linewidth, item.direction, strbbox(item.bbox))) elif isinstance(item, LTRect): self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' % (item.linewidth, strbbox(item.bbox))) elif isinstance(item, LTPolygon): self.outfp.write('<polygon linewidth="%d" bbox="%s" pts="%s"/>\n' % (item.linewidth, strbbox(item.bbox), item.get_pts())) elif isinstance(item, LTFigure): self.outfp.write('<figure id="%s" bbox="%s">\n' % (item.id, strbbox(item.bbox))) for child in item: render(child) self.outfp.write('</figure>\n') elif isinstance(item, LTTextLine): self.outfp.write('<textline bbox="%s">\n' % strbbox(item.bbox)) for child in item: render(child) self.outfp.write('</textline>\n') elif isinstance(item, LTTextBox): self.outfp.write('<textbox id="%s" bbox="%s">\n' % (item.id, strbbox(item.bbox))) for child in item: render(child) self.outfp.write('</textbox>\n') elif isinstance(item, LTTextItem): self.outfp.write('<text font="%s" vertical="%s" bbox="%s" fontsize="%.3f">' % (enc(item.font.fontname), item.is_vertical(), strbbox(item.bbox), item.fontsize)) self.write(item.text) self.outfp.write('</text>\n') elif isinstance(item, LTText): self.outfp.write('<text>%s</text>\n' % item.text) elif isinstance(item, LTImage): x = '' if self.outdir: name = self.write_image(item) if name: x = 'name="%s" ' % enc(name) self.outfp.write('<image %stype="%s" width="%d" height="%d" />\n' % (x, item.type, item.width, item.height)) else: assert 0, item return
def __repr__(self): if self.debug: return ('<text matrix=%s font=%r fontsize=%.1f bbox=%s adv=%s text=%r>' % ('[%.1f, %.1f, %.1f, %.1f, (%.1f, %.1f)]' % self.matrix, self.font, self.fontsize, strbbox(self.bbox), '(%.1f, %.1f)' % self.adv, self.text)) else: return '<text %r>' % self.text
def begin_page(self, page, ctm): self.outfp.write('<page id="%s" bbox="%s" rotate="%d">' % (self.pageno, strbbox(page.mediabox), page.rotate)) return
def __repr__(self): return ('<page id=%r bbox=%s rotate=%r>' % (self.id, strbbox(self.bbox), self.rotate))
def __repr__(self): return ('<textbox %s(%s) %r...>' % (strbbox(self.bbox), self.direction, self.get_text()[:20]))
def __repr__(self): return ('<textline %s(%s)>' % (strbbox(self.bbox), self.direction))
def __repr__(self): return ('<figure id=%r bbox=%s matrix=%r>' % (self.id, strbbox(self.bbox), self.matrix))
def __repr__(self): return ('<group %s>' % strbbox(self.bbox))
def __repr__(self): return ('<item bbox=%s>' % strbbox(self.bbox))