def render(item): if isinstance(item, LTContainer): for child in item: render(child) elif isinstance(item, LTText): self.text += item.get_text() if isinstance(item, LTTextBox): if re.search(r'^\[[0-9]+\] ',self.text) or \ re.search(r'[^ ]\[[0-9]+\] ',self.text) or \ re.search(r'[A-Z][a-z]+, [a-zA-Z]\.',self.text): TextConverter.write_text(self,self.text) TextConverter.write_text(self,'\n') self.text = '' elif isinstance(item, LTImage): if self.imagewriter is not None: self.imagewriter.export_image(item)
def receive_layout(self, ltpage): def render(item): if isinstance(item, LTContainer): for child in item: render(child) elif isinstance(item, LTText): self.text += item.get_text() if isinstance(item, LTTextBox): if re.search(r'^\[[0-9]+\] ',self.text) or \ re.search(r'[^ ]\[[0-9]+\] ',self.text) or \ re.search(r'[A-Z][a-z]+, [a-zA-Z]\.',self.text): TextConverter.write_text(self,self.text) TextConverter.write_text(self,'\n') self.text = '' elif isinstance(item, LTImage): if self.imagewriter is not None: self.imagewriter.export_image(item) if self.showpageno: TextConverter.write_text(self,'Page %s\n' % ltpage.pageid) render(ltpage) TextConverter.write_text(self,'\f') return