Exemple #1
0
 def __init__(self, filePath, mediaPath):
     self.filePath = filePath
     self.imageTracker = ImageTracker()
     self.imageTracker.destPath = mediaPath
Exemple #2
0
class OdtModel():
    def __init__(self, filePath, mediaPath):
        self.filePath = filePath
        self.imageTracker = ImageTracker()
        self.imageTracker.destPath = mediaPath
        
    
    def getPaths(self, textMode=False):
        self.paths = []
        self.isTextMode = textMode
        content = self.readContent(self.filePath)
        if (content == None):
            print 'No content was read.'
            return []
        
        # at this point assumptions are being made about
        # the data
        xml, ns = parse_and_get_ns(StringIO.StringIO(content))
        xml.nsmap = ns
        self.setTagNames(xml)
        
        body = xml.find(self.names['body'])
        text = body.find(self.names['text'])
        
        self.setStyles(xml)
        self.traverseTree(text)
        
        self.imageTracker.cleanup()
        return self.paths
        
    def readContent(self, path):
        if is_zipfile(path) == False:
            return None
        
        z = None
        try:
            z = ZipFile(path)
            content = z.read('content.xml')
            
        except:
            print 'failed to open zip file or read \'content.xml\''
            z.close()
            return None
        
        self.imageTracker.extractImages(z)
        z.close()

        return content    
    
    def getLists(self, parent):
        return self.getTagsByName(parent, self.names['list'])
        
    def getListItems(self, parent):
        return self.getTagsByName(parent, self.names['list-item'])
        
    def getTagsByName(self, parent, tagName):
        children = []
        for child in parent:
            if child.tag == tagName:
                children.append(child)
        return children
        
    def isLeaf(self, element):
        if (len(self.getLists(element)) > 0):
            return False
        else:
            return True
    
    def getText(self, element):
        ps = self.getTagsByName(element, self.names['p'])
        if len(ps) < 1:
            return '{{empty}}'
        else:
            p = ps[0]
            self.builder.parse(p)
            if self.isTextMode:
                return self.builder.plainText
            else:
                return self.builder.formattedText
            
            
    def traverse(self, path, element):
        text = self.getText(element)
        path.append(text)
        node = Node(text)
        
        if not self.isLeaf(element):
            for child in self.getListItems(self.getLists(element)[0]):
                childNode = self.traverse(path[:], child)
                node.children.append(childNode)
        else:
            self.paths.append(path)
        return node
            
    def traverseTree(self, root):
        self.trees = [] # is this used?
        lists = self.getLists(root)
        if (len(lists) < 1):
            print 'No lists found in this document.'
        
        for l in lists:
            items = self.getListItems(l)
            for e in items:
                self.trees.append(self.traverse([], e))
    
    def setTagNames(self, xml):
        self.builder = TextBuilder()
        self.builder.imageTracker = self.imageTracker

        officens = xml.nsmap['office']
        textns = xml.nsmap['text']
        drawns = xml.nsmap['draw']
        xlinkns = xml.nsmap['xlink']
        stylens = xml.nsmap['style']
        fons = xml.nsmap['fo']
        
        names = {}
        names['body'] = '{%s}body' % officens
        names['text'] = '{%s}text' % officens
        names['auto-styles'] = '{%s}automatic-styles' % officens
        names['list'] = '{%s}list' % textns
        names['list-item'] = '{%s}list-item' % textns
        names['p'] = '{%s}p' % textns
        names['s'] = '{%s}s' % textns
        names['line-break'] = '{%s}line-break' % textns
        names['tab'] = '{%s}tab' % textns
        names['span'] = '{%s}span' % textns
        names['frame'] = '{%s}frame' % drawns
        names['image'] = '{%s}image' % drawns
        
        names['href'] = '{%s}href' % xlinkns
        names['name'] = '{%s}name' % stylens
        names['style-name'] = '{%s}style-name' % textns
        names['font-weight'] = '{%s}font-weight' % fons
        names['text-underline-style'] = '{%s}text-underline-style' % stylens
        names['font-style'] = '{%s}font-style' % fons
        names['font-size'] = '{%s}font-size' % fons
        names['background-color'] = '{%s}background-color' % fons
        names['color'] = '{%s}color' % fons
        
        names['style'] = '{%s}style' % stylens
        names['text-properties'] = '{%s}text-properties' % stylens
        names['use-window-font-color']= '{%s}use-window-font-color' %stylens
        self.names = names
        self.builder.names = names
        
    def setStyles(self, xml):
        interprettedStyles = []
        styles = xml.find(self.names['auto-styles'])
        for style in styles:
            s = Style()
            s.name = style.attrib[self.names['name']]
            
            props = style.find(self.names['text-properties'])
            if props != None:
                if self.names['font-weight'] in props.attrib:
                    if props.attrib[self.names['font-weight']] == 'bold':
                        s.bold = True
                    elif props.attrib[self.names['font-weight']] == 'normal':
                        s.resetBold = True                        
                
                if self.names['text-underline-style'] in props.attrib:
                    if props.attrib[self.names['text-underline-style']] == 'solid':
                        s.underline = True
                    elif props.attrib[self.names['text-underline-style']] == 'none':
                        s.resetUnderline = True
                
                if self.names['font-style'] in props.attrib:
                    if props.attrib[self.names['font-style']] == 'italic':
                        s.italics = True
                    elif props.attrib[self.names['font-style']] == 'normal':
                        s.resetItalics = True
                        
                if self.names['color'] in props.attrib:
                    s.color = props.attrib[self.names['color']]
                elif self.names['use-window-font-color'] in props.attrib:
                    if props.attrib[self.names['use-window-font-color']] == 'true':
                        s.color = 'default'
                
                if self.names['background-color'] in props.attrib:
                    s.bgColor = props.attrib[self.names['background-color']]
                
                if self.names['font-size'] in props.attrib:
                    s.size = props.attrib[self.names['font-size']]
            
            #if not s.isDefault():
            interprettedStyles.append(s)
        self.builder.styles = interprettedStyles