Ejemplo n.º 1
0
 def retrieveCP(self, cp):
     pos, compressed = self.__cpToOffset(cp)
     if compressed:
         return globals.encodeName(self.bytes[pos])
     else:
         return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16'),
                                   lowOnly=True)
Ejemplo n.º 2
0
 def retrieveCP(self, cp):
     pos, compressed = self.__cpToOffset(cp)
     if compressed:
         return globals.encodeName(self.bytes[pos])
     else:
         try:
             return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16'), lowOnly=True)
         except UnicodeDecodeError:
             reason = 'could not decode bytes in position %d-%d (%s-%s)' % (pos, pos + 1, hex(ord(self.bytes[pos])), hex(ord(self.bytes[pos + 1])))
             print '<todo what="WordDocumentStream::retrieveCP(): %s"/>' % reason
             return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16', errors="replace"), lowOnly=True)
Ejemplo n.º 3
0
 def dump(self):
     print '<%s type="CodePageString">' % self.name
     self.printAndSet("Size", self.readuInt32())
     bytes = []
     for dummy in range(self.Size):
         c = self.readuInt8()
         if c == 0:
             break
         bytes.append(c)
     codepage = self.parent.parent.getCodePage()
     if (codepage is not None) and (codepage < 0):
         codepage += 2**16  # signed -> unsigned
     encoding = ""
     # http://msdn.microsoft.com/en-us/goglobal/bb964654
     if codepage == 1252:
         encoding = "latin1"
     elif codepage == 1250:
         encoding = "latin2"
     elif codepage == 65001:
         # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
         encoding = "utf-8"
     if len(encoding):
         print '<Characters value="%s"/>' % globals.encodeName(
             "".join(map(lambda c: chr(c), bytes)).decode(encoding),
             lowOnly=True).encode('utf-8')
     else:
         print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage
     print '</%s>' % self.name
Ejemplo n.º 4
0
 def dump(self):
     print '<%s type="CodePageString">' % self.name
     self.printAndSet("Size", self.readuInt32())
     bytes = []
     for dummy in range(self.Size):
         c = self.readuInt8()
         if c == 0:
             break
         bytes.append(c)
     codepage = self.parent.parent.getCodePage()
     if (codepage is not None) and (codepage < 0):
         codepage += 2 ** 16  # signed -> unsigned
     encoding = ""
     # http://msdn.microsoft.com/en-us/goglobal/bb964654
     if codepage == 1252:
         encoding = "latin1"
     elif codepage == 1250:
         encoding = "latin2"
     elif codepage == 65001:
         # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
         encoding = "utf-8"
     if len(encoding):
         print '<Characters value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8')
     else:
         print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage
     print '</%s>' % self.name
Ejemplo n.º 5
0
 def retrieveCP(self, cp):
     pos, compressed = self.__cpToOffset(cp)
     if compressed:
         return globals.encodeName(self.bytes[pos])
     else:
         try:
             return globals.encodeName(self.bytes[pos:pos +
                                                  2].decode('utf-16'),
                                       lowOnly=True)
         except UnicodeDecodeError:
             reason = 'could not decode bytes in position %d-%d (%s-%s)' % (
                 pos, pos + 1, hex(ord(
                     self.bytes[pos])), hex(ord(self.bytes[pos + 1])))
             print '<todo what="WordDocumentStream::retrieveCP(): %s"/>' % reason
             return globals.encodeName(self.bytes[pos:pos + 2].decode(
                 'utf-16', errors="replace"),
                                       lowOnly=True)
Ejemplo n.º 6
0
 def __printDirHeader (self, direntry, byteLen):
     dirname = direntry.Name
     dirname = globals.encodeName(dirname)
     print("")
     print("="*globals.OutputWidth)
     if direntry.isStorage():
         print("%s (storage)"%dirname)
     else:
         print("%s (stream, size: %d bytes)"%(dirname, byteLen))
     print("-"*globals.OutputWidth)
Ejemplo n.º 7
0
 def __printDirHeader(self, direntry, byteLen):
     dirname = direntry.Name
     dirname = globals.encodeName(dirname)
     print("")
     print("=" * globals.OutputWidth)
     if direntry.isStorage():
         print("%s (storage)" % dirname)
     else:
         print("%s (stream, size: %d bytes)" % (dirname, byteLen))
     print("-" * globals.OutputWidth)
Ejemplo n.º 8
0
    def __addSiblings( self, entries, parent, child ):
        # add left siblings
        nextLeft = child.Entry.DirIDLeft
        if ( nextLeft > 0 ):
            newEntry = DirNode( entries[ nextLeft ] )
            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
            if  newEntry.Entry.DirIDRoot > 0:
                newEntry.HierachicalName = newEntry.HierachicalName + '/'

            self.__addSiblings( entries, parent, newEntry ) 
            parent.Nodes.insert( 0, newEntry )

        nextRight = child.Entry.DirIDRight
        # add children to the right 
        if ( nextRight > 0 ):
            newEntry = DirNode( entries[ nextRight ] )
            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
            if  newEntry.Entry.DirIDRoot > 0:
                newEntry.HierachicalName = newEntry.HierachicalName + '/'
            self.__addSiblings( entries, parent, newEntry ) 
            parent.Nodes.append( newEntry )
Ejemplo n.º 9
0
    def createDOM(self, wb):
        nd = node.Element("external-sheet-cache")
        # 1st char is always 0x1.
        nd.setAttr("url", globals.encodeName(self.docURL[1:]))
        for sheet in self.__sheets:
            if sheet[1] == None:
                continue
            elem = sheet[1].createDOM(wb)
            elem.setAttr("name", sheet[0])
            nd.appendChild(elem)

        return nd
Ejemplo n.º 10
0
    def createDOM (self, wb):
        nd = node.Element("external-sheet-cache")
        # 1st char is always 0x1.
        nd.setAttr("url", globals.encodeName(self.docURL[1:]))
        for sheet in self.__sheets:
            if sheet[1] == None:
                continue
            elem = sheet[1].createDOM(wb)
            elem.setAttr("name", sheet[0])
            nd.appendChild(elem)

        return nd
Ejemplo n.º 11
0
    def __buildTreeImpl(self, entries, parent ):

        if ( parent.Entry.DirIDRoot > 0 ):
            newEntry = DirNode( entries[ parent.Entry.DirIDRoot ] )
            newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name )
            if ( newEntry.Entry.DirIDRoot > 0 ):
                newEntry.HierachicalName =  newEntry.HierachicalName + '/'

            self.__addSiblings( entries, parent, newEntry )
            parent.Nodes.append( newEntry )
            
        for child in parent.Nodes:
            if child.Entry.DirIDRoot > 0:
                self.__buildTreeImpl( entries, child )
Ejemplo n.º 12
0
 def __getString(self, limit):
     bytes = []
     count = 0
     pos = self.pos
     while True:
         if (limit is not None) and count == limit:
             break
         i = self.getuInt8(pos=pos)
         pos += 1
         j = self.getuInt8(pos=pos)
         pos += 1
         if i != 0 or j != 0:
             bytes.append(i)
             bytes.append(j)
         else:
             break
         count += 1
     return (self.quoteAttr(globals.encodeName(globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes))))), pos)
Ejemplo n.º 13
0
 def __getString(self, limit):
     bytes = []
     count = 0
     pos = self.pos
     while True:
         if (limit is not None) and count == limit:
             break
         i = self.getuInt8(pos=pos)
         pos += 1
         j = self.getuInt8(pos=pos)
         pos += 1
         if i != 0 or j != 0:
             bytes.append(i)
             bytes.append(j)
         else:
             break
         count += 1
     return (self.quoteAttr(globals.encodeName(globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes))))), pos)
Ejemplo n.º 14
0
    def dump(self):
        print '<dictionaryEntry offset="%s" index="%s">' % (self.pos,
                                                            self.index)
        self.printAndSet("PropertyIdentifier", self.readuInt32())
        self.printAndSet("Length", self.readuInt32())

        bytes = []
        for dummy in range(self.Length):
            c = self.readuInt8()
            if c == 0:
                break
            bytes.append(c)
        # TODO support non-latin1
        encoding = "latin1"
        print '<Name value="%s"/>' % globals.encodeName(
            "".join(map(lambda c: chr(c), bytes)).decode(encoding),
            lowOnly=True).encode('utf-8')

        print '</dictionaryEntry>'
        self.parent.pos = self.pos
Ejemplo n.º 15
0
 def dump(self):
     print '<stream name="%s" size="%s"/>' % (self.quoteAttr(
         globals.encodeName(self.name)), self.size)
Ejemplo n.º 16
0
 def dump(self):
     print '<stream name="%s" size="%s"/>' % (self.quoteAttr(globals.encodeName(self.name)), self.size)
Ejemplo n.º 17
0
 def __printDirHeader(self, dirname, byteLen):
     dirname = globals.encodeName(dirname)
     print("")
     print("=" * 68)
     print("%s (size: %d bytes)" % (dirname, byteLen))
     print("-" * 68)
Ejemplo n.º 18
0
 def retrieveCP(self, cp):
     pos, compressed = self.__cpToOffset(cp)
     if compressed:
         return globals.encodeName(self.bytes[pos])
     else:
         return globals.encodeName(self.bytes[pos:pos+2].decode('utf-16'), lowOnly = True)