Example #1
0
    def run(self, domDoc):
        """
            conversion
        """
        ODoc = XMLDSDocument()
        # ODoc.lastPage=1
        ODoc.loadFromDom(domDoc)
        lPageXmlDoc = []
        lPages = ODoc.getPages()
        for page in lPages:
            #             print("%s %s"%(page, page.getAttribute('imageFilename')))
            try:
                filename = os.path.basename(page.getAttribute('imageFilename'))
            except:
                filename = "fakename"
            pageXmlDoc, pageNode = PageXml.createPageXmlDocument(
                creatorName='NLE',
                filename=filename,
                imgW=convertDot2Pixel(self.dpi, page.getWidth()),
                imgH=convertDot2Pixel(self.dpi, page.getHeight()))
            self.pageXmlNS = etree.QName(pageXmlDoc.getroot()).namespace
            if self.bRegionOnly:
                self.convertOnlyRegion(page, pageNode)
            else:
                self.convertDSPage(page, pageNode)
            lPageXmlDoc.append(
                (pageXmlDoc, page.getAttribute('imageFilename')))

        return lPageXmlDoc
Example #2
0
    def findTemplate(self,doc):
        """
            find the page where the first TableRegion occurs and extract it
        """
        lT = PageXml.getChildByName(doc.getRootElement(),'TableRegion')
        if lT == []:
            return None
        firstTable=lT[0]
        # lazy guy!
        page = firstTable.parent
        newDoc,_ = PageXml.createPageXmlDocument('XRCE', '', 0,0)
        page.unlinkNode()
        newDoc.setRootElement(page)
        ### need to add the ns!!
#         print newDoc.serialize('utf-8',True)
        # Borders must be visible: 
        #leftBorderVisible="false" rightBorderVisible="false" topBorderVisible="false
        lcells = PageXml.getChildByName(newDoc.getRootElement(),'TableCell')
        for cell in lcells:
            cell.setProp("leftBorderVisible",'true')
            cell.setProp("rightBorderVisible",'true')
            cell.setProp("topBorderVisible",'true')
            cell.setProp("bottomBorderVisible",'true')

        return newDoc
Example #3
0
    def findTemplate(self, doc):
        """
            find the page where the first TableRegion occurs and extract it
            
        """
        from copy import deepcopy

        lT = PageXml.getChildByName(doc.getroot(), 'TableRegion')
        if lT == []:
            return None
        firstTable = lT[0]
        # lazy guy!
        newDoc, fakepage = PageXml.createPageXmlDocument('NLE', '', 0, 0)

        page = firstTable.getparent()
        fakepage.set("imageFilename", page.get('imageFilename'))
        fakepage.set("imageWidth", page.get('imageWidth'))
        fakepage.set("imageHeight", page.get('imageHeight'))
        page.getparent().remove(page)
        # add table

        xx = deepcopy(firstTable)
        fakepage.append(xx)
        return newDoc
Example #4
0
def test_CreationPageXmlDocument():
    doc = PageXml.createPageXmlDocument(creatorName='HerveforTest',
                                        filename='hervefortest.jpg',
                                        imgW=100,
                                        imgH=100)
    print(doc)