def run(self, domDoc): """ conversion """ ODoc = XMLDSDocument() # ODoc.lastPage=1 ODoc.loadFromDom(domDoc) lPageXmlDoc = [] lPages = ODoc.getPages() for page in lPages: # print("%s %s"%(page, page.getAttribute('imageFilename'))) try: filename = os.path.basename(page.getAttribute('imageFilename')) except: filename = "fakename" pageXmlDoc, pageNode = PageXml.createPageXmlDocument( creatorName='NLE', filename=filename, imgW=convertDot2Pixel(self.dpi, page.getWidth()), imgH=convertDot2Pixel(self.dpi, page.getHeight())) self.pageXmlNS = etree.QName(pageXmlDoc.getroot()).namespace if self.bRegionOnly: self.convertOnlyRegion(page, pageNode) else: self.convertDSPage(page, pageNode) lPageXmlDoc.append( (pageXmlDoc, page.getAttribute('imageFilename'))) return lPageXmlDoc
def findTemplate(self,doc): """ find the page where the first TableRegion occurs and extract it """ lT = PageXml.getChildByName(doc.getRootElement(),'TableRegion') if lT == []: return None firstTable=lT[0] # lazy guy! page = firstTable.parent newDoc,_ = PageXml.createPageXmlDocument('XRCE', '', 0,0) page.unlinkNode() newDoc.setRootElement(page) ### need to add the ns!! # print newDoc.serialize('utf-8',True) # Borders must be visible: #leftBorderVisible="false" rightBorderVisible="false" topBorderVisible="false lcells = PageXml.getChildByName(newDoc.getRootElement(),'TableCell') for cell in lcells: cell.setProp("leftBorderVisible",'true') cell.setProp("rightBorderVisible",'true') cell.setProp("topBorderVisible",'true') cell.setProp("bottomBorderVisible",'true') return newDoc
def findTemplate(self, doc): """ find the page where the first TableRegion occurs and extract it """ from copy import deepcopy lT = PageXml.getChildByName(doc.getroot(), 'TableRegion') if lT == []: return None firstTable = lT[0] # lazy guy! newDoc, fakepage = PageXml.createPageXmlDocument('NLE', '', 0, 0) page = firstTable.getparent() fakepage.set("imageFilename", page.get('imageFilename')) fakepage.set("imageWidth", page.get('imageWidth')) fakepage.set("imageHeight", page.get('imageHeight')) page.getparent().remove(page) # add table xx = deepcopy(firstTable) fakepage.append(xx) return newDoc
def test_CreationPageXmlDocument(): doc = PageXml.createPageXmlDocument(creatorName='HerveforTest', filename='hervefortest.jpg', imgW=100, imgH=100) print(doc)