def render(item): def sort_x(a, b): return int(a.x0 - b.x0) def sort_y(a, b): return int(b.y0 - a.y0) if isinstance(item, LTPage): self.doc.handle_page() item._objs = sorted(item._objs, cmp=sort_y) for child in item: #print child render(child) elif isinstance(item, LTLine): self.doc.handle_hline(item) elif isinstance(item, LTRect): self.doc.handle_hline(item) elif isinstance(item, LTCurve): if False: self.doc.out.write('<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % (item.linewidth, bbox2str(item.bbox), item.get_pts())) elif isinstance(item, LTFigure): #self.doc.out.write('<figure name="%s" bbox="%s">\n' % # (item.name, bbox2str(item.bbox))) for child in item: render(child) #self.doc.out.write('</figure>\n') elif isinstance(item, LTTextLine): if get_text(item).strip(): self.doc.new_chunk(item.bbox) for child in item: render(child) elif isinstance(item, LTTextBox): # major change: sort boxes by x item._objs = sorted(item._objs, cmp=sort_x) self.doc.new_line(item.bbox) for child in item: render(child) elif isinstance(item, LTChar): self.doc.write_text(item.get_text(), item) elif isinstance(item, LTText): for t in item.get_text(): self.doc.write_text(t, item) elif isinstance(item, LTImage): self.doc.handle_image(item) else: print isinstance(item, LTPage), type(item) assert 0, item return
def render(item): if isinstance(item, LTPage): self.outfp.write('<a id="page_%s" data-bbox="%s" data-rotate="%d"></a>\n' % (item.pageid, bbox2str(item.bbox), item.rotate)) for child in item: render(child) elif isinstance(item, LTLine): self.outfp.write('<line linewidth="%d" bbox="%s" />\n' % (item.linewidth, bbox2str(item.bbox))) elif isinstance(item, LTRect): self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' % (item.linewidth, bbox2str(item.bbox))) elif isinstance(item, LTCurve): self.outfp.write('<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % (item.linewidth, bbox2str(item.bbox), item.get_pts())) elif isinstance(item, LTFigure): self.outfp.write('<figure name="%s" bbox="%s">\n' % (item.name, bbox2str(item.bbox))) for child in item: render(child) self.outfp.write('</figure>\n') elif isinstance(item, LTTextLine): self.outfp.write('<span data-bbox="%s"/>\n' % bbox2str(item.bbox)) for child in item: render(child) # self.outfp.write('</p>\n') elif isinstance(item, LTTextBox): wmode = '' if isinstance(item, LTTextBoxVertical): wmode = ' wmode="vertical"' self.outfp.write('<div id="%d" data-bbox="%s" data-wmode="%s"><p>\n' % (item.index, bbox2str(item.bbox), wmode)) for child in item: render(child) self.outfp.write('</p></div>\n') elif isinstance(item, LTChar): # self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' % # (enc(item.fontname), bbox2str(item.bbox), item.size)) self.write_text(item.get_text()) # self.outfp.write('</text>\n') elif isinstance(item, LTText): self.outfp.write(item.get_text()) elif isinstance(item, LTImage): if self.imagewriter is not None: name = self.imagewriter.export_image(item) self.outfp.write('<img src="%s" width="%d" height="%d" />\n' % (enc(name), item.width, item.height)) else: self.outfp.write('<img width="%d" height="%d" />\n' % (item.width, item.height)) else: assert 0, item return
def __repr__(self): return ('<%s %s Type=%r Font=%r>' % (self.__class__.__name__, bbox2str(self.bbox), self.type, self.font))
def __repr__(self) -> str: return ('<%s(%r) %s rotate=%r>' % (self.__class__.__name__, self.pageid, bbox2str( self.bbox), self.rotate))
def render(item): if isinstance(item, LTPage): s = '<page id="%s" bbox="%s" rotate="%d">\n' % ( item.pageid, bbox2str(item.bbox), item.rotate, ) self.current_page = RpaPdfPage(item.pageid, item.bbox, item.rotate) self.write(s) for child in item: render(child) if item.groups is not None: self.write("<layout>\n") for group in item.groups: show_group(group) self.write("</layout>\n") self.write("</page>\n") self.rpa_pdf_document.add_page(self.current_page) elif isinstance(item, LTLine): s = '<line linewidth="%d" bbox="%s" />\n' % ( item.linewidth, bbox2str(item.bbox), ) self.write(s) elif isinstance(item, LTRect): s = '<rect linewidth="%d" bbox="%s" />\n' % ( item.linewidth, bbox2str(item.bbox), ) self.write(s) elif isinstance(item, LTCurve): s = '<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % ( item.linewidth, bbox2str(item.bbox), item.get_pts(), ) self.write(s) elif isinstance(item, LTFigure): s = '<figure name="%s" bbox="%s">\n' % (item.name, bbox2str(item.bbox)) self.write(s) self.figure = RpaFigure(item.name, item.bbox) for child in item: self.figure.set_item(item) render(child) self.write("</figure>\n") self.current_page.add_content(self.figure) self.figure = None elif isinstance(item, LTTextLine): self.write('<textline bbox="%s">\n' % bbox2str(item.bbox)) for child in item: render(child) self.write("</textline>\n") elif isinstance(item, LTTextBox): wmode = "" if isinstance(item, LTTextBoxVertical): wmode = ' wmode="vertical"' s = '<textbox id="%d" bbox="%s"%s>\n' % ( item.index, bbox2str(item.bbox), wmode, ) box = RpaTextBox(item.index, item.bbox, wmode) self.write(s) box.set_item(item) self.current_page.add_content(box) for child in item: render(child) self.write("</textbox>\n") elif isinstance(item, LTChar): s = ('<text font="%s" bbox="%s" colourspace="%s" ' 'ncolour="%s" size="%.3f">' % ( enc(item.fontname), bbox2str(item.bbox), item.ncs.name, item.graphicstate.ncolor, item.size, )) self.write(s) self.write_text(item.get_text()) self.write("</text>\n") elif isinstance(item, LTText): self.write("<text>%s</text>\n" % item.get_text()) elif isinstance(item, LTImage): if self.figure: self.figure.set_item(item) if self.imagewriter is not None: name = self.imagewriter.export_image(item) self.write('<image src="%s" width="%d" height="%d" />\n' % (enc(name), item.width, item.height)) else: self.write('<image width="%d" height="%d" />\n' % (item.width, item.height)) else: assert False, str(("Unhandled", item))