Example #1
0
        def render(item):
            def sort_x(a, b):
                return int(a.x0 - b.x0)

            def sort_y(a, b):
                return int(b.y0 - a.y0)

            if isinstance(item, LTPage):
                self.doc.handle_page()
                item._objs = sorted(item._objs, cmp=sort_y)
                for child in item:
                    #print child
                    render(child)

            elif isinstance(item, LTLine):
                self.doc.handle_hline(item)

            elif isinstance(item, LTRect):
                self.doc.handle_hline(item)

            elif isinstance(item, LTCurve):
                if False:
                    self.doc.out.write('<curve linewidth="%d" bbox="%s" pts="%s"/>\n' %
                                     (item.linewidth, bbox2str(item.bbox), item.get_pts()))

            elif isinstance(item, LTFigure):
                #self.doc.out.write('<figure name="%s" bbox="%s">\n' %
                #                 (item.name, bbox2str(item.bbox)))
                for child in item:
                    render(child)
                #self.doc.out.write('</figure>\n')

            elif isinstance(item, LTTextLine):
                if get_text(item).strip():
                    self.doc.new_chunk(item.bbox)
                    for child in item:
                        render(child)

            elif isinstance(item, LTTextBox):
                # major change: sort boxes by x
                item._objs = sorted(item._objs, cmp=sort_x)
                self.doc.new_line(item.bbox)
                for child in item:
                    render(child)

            elif isinstance(item, LTChar):
                self.doc.write_text(item.get_text(), item)

            elif isinstance(item, LTText):
                for t in item.get_text():
                    self.doc.write_text(t, item)

            elif isinstance(item, LTImage):
                self.doc.handle_image(item)
            else:
                print isinstance(item, LTPage), type(item)
                assert 0, item
            return
Example #2
0
 def render(item):
     if isinstance(item, LTPage):
         self.outfp.write('<a id="page_%s" data-bbox="%s" data-rotate="%d"></a>\n' %
                          (item.pageid, bbox2str(item.bbox), item.rotate))
         for child in item:
             render(child)
     elif isinstance(item, LTLine):
         self.outfp.write('<line linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTRect):
         self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' %
                          (item.linewidth, bbox2str(item.bbox)))
     elif isinstance(item, LTCurve):
         self.outfp.write('<curve linewidth="%d" bbox="%s" pts="%s"/>\n' %
                          (item.linewidth, bbox2str(item.bbox), item.get_pts()))
     elif isinstance(item, LTFigure):
         self.outfp.write('<figure name="%s" bbox="%s">\n' %
                          (item.name, bbox2str(item.bbox)))
         for child in item:
             render(child)
         self.outfp.write('</figure>\n')
     elif isinstance(item, LTTextLine):
         self.outfp.write('<span data-bbox="%s"/>\n' % bbox2str(item.bbox))
         for child in item:
             render(child)
         # self.outfp.write('</p>\n')
     elif isinstance(item, LTTextBox):
         wmode = ''
         if isinstance(item, LTTextBoxVertical):
             wmode = ' wmode="vertical"'
         self.outfp.write('<div id="%d" data-bbox="%s" data-wmode="%s"><p>\n' %
                          (item.index, bbox2str(item.bbox), wmode))
         for child in item:
             render(child)
         self.outfp.write('</p></div>\n')
     elif isinstance(item, LTChar):
         # self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' %
         #                  (enc(item.fontname), bbox2str(item.bbox), item.size))
         self.write_text(item.get_text())
         # self.outfp.write('</text>\n')
     elif isinstance(item, LTText):
         self.outfp.write(item.get_text())
     elif isinstance(item, LTImage):
         if self.imagewriter is not None:
             name = self.imagewriter.export_image(item)
             self.outfp.write('<img src="%s" width="%d" height="%d" />\n' %
                              (enc(name), item.width, item.height))
         else:
             self.outfp.write('<img width="%d" height="%d" />\n' %
                              (item.width, item.height))
     else:
         assert 0, item
     return
Example #3
0
 def __repr__(self):
     return ('<%s %s Type=%r Font=%r>' % (self.__class__.__name__, bbox2str(self.bbox),
                                          self.type, self.font))
Example #4
0
 def __repr__(self) -> str:
     return ('<%s(%r) %s rotate=%r>' %
             (self.__class__.__name__, self.pageid, bbox2str(
                 self.bbox), self.rotate))
Example #5
0
        def render(item):
            if isinstance(item, LTPage):
                s = '<page id="%s" bbox="%s" rotate="%d">\n' % (
                    item.pageid,
                    bbox2str(item.bbox),
                    item.rotate,
                )
                self.current_page = RpaPdfPage(item.pageid, item.bbox,
                                               item.rotate)

                self.write(s)
                for child in item:
                    render(child)
                if item.groups is not None:
                    self.write("<layout>\n")
                    for group in item.groups:
                        show_group(group)
                    self.write("</layout>\n")
                self.write("</page>\n")
                self.rpa_pdf_document.add_page(self.current_page)
            elif isinstance(item, LTLine):
                s = '<line linewidth="%d" bbox="%s" />\n' % (
                    item.linewidth,
                    bbox2str(item.bbox),
                )
                self.write(s)
            elif isinstance(item, LTRect):
                s = '<rect linewidth="%d" bbox="%s" />\n' % (
                    item.linewidth,
                    bbox2str(item.bbox),
                )
                self.write(s)
            elif isinstance(item, LTCurve):
                s = '<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % (
                    item.linewidth,
                    bbox2str(item.bbox),
                    item.get_pts(),
                )
                self.write(s)
            elif isinstance(item, LTFigure):
                s = '<figure name="%s" bbox="%s">\n' % (item.name,
                                                        bbox2str(item.bbox))
                self.write(s)
                self.figure = RpaFigure(item.name, item.bbox)
                for child in item:
                    self.figure.set_item(item)
                    render(child)
                self.write("</figure>\n")
                self.current_page.add_content(self.figure)
                self.figure = None
            elif isinstance(item, LTTextLine):
                self.write('<textline bbox="%s">\n' % bbox2str(item.bbox))
                for child in item:
                    render(child)
                self.write("</textline>\n")
            elif isinstance(item, LTTextBox):
                wmode = ""

                if isinstance(item, LTTextBoxVertical):
                    wmode = ' wmode="vertical"'
                s = '<textbox id="%d" bbox="%s"%s>\n' % (
                    item.index,
                    bbox2str(item.bbox),
                    wmode,
                )
                box = RpaTextBox(item.index, item.bbox, wmode)
                self.write(s)
                box.set_item(item)
                self.current_page.add_content(box)
                for child in item:
                    render(child)
                self.write("</textbox>\n")
            elif isinstance(item, LTChar):
                s = ('<text font="%s" bbox="%s" colourspace="%s" '
                     'ncolour="%s" size="%.3f">' % (
                         enc(item.fontname),
                         bbox2str(item.bbox),
                         item.ncs.name,
                         item.graphicstate.ncolor,
                         item.size,
                     ))
                self.write(s)
                self.write_text(item.get_text())
                self.write("</text>\n")
            elif isinstance(item, LTText):
                self.write("<text>%s</text>\n" % item.get_text())
            elif isinstance(item, LTImage):
                if self.figure:
                    self.figure.set_item(item)
                if self.imagewriter is not None:
                    name = self.imagewriter.export_image(item)
                    self.write('<image src="%s" width="%d" height="%d" />\n' %
                               (enc(name), item.width, item.height))
                else:
                    self.write('<image width="%d" height="%d" />\n' %
                               (item.width, item.height))
            else:
                assert False, str(("Unhandled", item))