def render(item): if isinstance(item, LTPage): self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' % (item.pageid, bbox2str(item.bbox), item.rotate)) for child in item: render(child) if item.groups is not None: self.outfp.write('<layout>\n') for group in item.groups: show_group(group) self.outfp.write('</layout>\n') self.outfp.write('</page>\n') elif isinstance(item, LTLine): self.outfp.write('<line linewidth="%d" bbox="%s" />\n' % (item.linewidth, bbox2str(item.bbox))) elif isinstance(item, LTRect): self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' % (item.linewidth, bbox2str(item.bbox))) elif isinstance(item, LTCurve): self.outfp.write('<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % (item.linewidth, bbox2str(item.bbox), item.get_pts())) elif isinstance(item, LTFigure): self.outfp.write('<figure name="%s" bbox="%s">\n' % (item.name, bbox2str(item.bbox))) for child in item: render(child) self.outfp.write('</figure>\n') elif isinstance(item, LTTextLine): self.outfp.write('<textline bbox="%s">\n' % bbox2str(item.bbox)) for child in item: render(child) self.outfp.write('</textline>\n') elif isinstance(item, LTTextBox): wmode = '' if isinstance(item, LTTextBoxVertical): wmode = ' wmode="vertical"' self.outfp.write('<textbox id="%d" bbox="%s"%s>\n' % (item.index, bbox2str(item.bbox), wmode)) for child in item: render(child) self.outfp.write('</textbox>\n') elif isinstance(item, LTChar): self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' % (enc(item.fontname), bbox2str(item.bbox), item.size)) self.write_text(item.get_text()) self.outfp.write('</text>\n') elif isinstance(item, LTText): self.outfp.write('<text>%s</text>\n' % item.get_text()) elif isinstance(item, LTImage): if self.outdir: name = self.write_image(item) self.outfp.write('<image src="%s" width="%d" height="%d" />\n' % (enc(name), item.width, item.height)) else: self.outfp.write('<image width="%d" height="%d" />\n' % (item.width, item.height)) else: assert 0, item return
def begin_tag(self, tag, props=None): s = '' if isinstance(props, dict): s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v) in sorted(props.items()) ) self.outfp.write('<%s%s>' % (enc(tag.name), s)) self._stack.append(tag) return
def place_image(self, item, borderwidth, x, y, w, h): if self.outdir is not None: name = self.write_image(item) self.write('<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" ' 'width="%d" height="%d" />\n' % (enc(name), borderwidth, x*self.scale, (self._yoffset-y)*self.scale, w*self.scale, h*self.scale)) return
def render_string(self, textstate, seq): font = textstate.font text = '' for obj in seq: if not isinstance(obj, str): continue chars = font.decode(obj) for cid in chars: try: char = font.to_unichr(cid) text += char except PDFUnicodeNotDefined: pass self.outfp.write(enc(text, self.codec)) return
def end_tag(self): assert self._stack tag = self._stack.pop(-1) self.outfp.write('</%s>' % enc(tag.name)) return
def write_text(self, text): self.outfp.write(enc(text, self.codec)) return