def retrieveCP(self, cp): pos, compressed = self.__cpToOffset(cp) if compressed: return globals.encodeName(self.bytes[pos]) else: return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16'), lowOnly=True)
def retrieveCP(self, cp): pos, compressed = self.__cpToOffset(cp) if compressed: return globals.encodeName(self.bytes[pos]) else: try: return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16'), lowOnly=True) except UnicodeDecodeError: reason = 'could not decode bytes in position %d-%d (%s-%s)' % (pos, pos + 1, hex(ord(self.bytes[pos])), hex(ord(self.bytes[pos + 1]))) print '<todo what="WordDocumentStream::retrieveCP(): %s"/>' % reason return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16', errors="replace"), lowOnly=True)
def dump(self): print '<%s type="CodePageString">' % self.name self.printAndSet("Size", self.readuInt32()) bytes = [] for dummy in range(self.Size): c = self.readuInt8() if c == 0: break bytes.append(c) codepage = self.parent.parent.getCodePage() if (codepage is not None) and (codepage < 0): codepage += 2**16 # signed -> unsigned encoding = "" # http://msdn.microsoft.com/en-us/goglobal/bb964654 if codepage == 1252: encoding = "latin1" elif codepage == 1250: encoding = "latin2" elif codepage == 65001: # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx encoding = "utf-8" if len(encoding): print '<Characters value="%s"/>' % globals.encodeName( "".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8') else: print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage print '</%s>' % self.name
def dump(self): print '<%s type="CodePageString">' % self.name self.printAndSet("Size", self.readuInt32()) bytes = [] for dummy in range(self.Size): c = self.readuInt8() if c == 0: break bytes.append(c) codepage = self.parent.parent.getCodePage() if (codepage is not None) and (codepage < 0): codepage += 2 ** 16 # signed -> unsigned encoding = "" # http://msdn.microsoft.com/en-us/goglobal/bb964654 if codepage == 1252: encoding = "latin1" elif codepage == 1250: encoding = "latin2" elif codepage == 65001: # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx encoding = "utf-8" if len(encoding): print '<Characters value="%s"/>' % globals.encodeName("".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8') else: print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage print '</%s>' % self.name
def retrieveCP(self, cp): pos, compressed = self.__cpToOffset(cp) if compressed: return globals.encodeName(self.bytes[pos]) else: try: return globals.encodeName(self.bytes[pos:pos + 2].decode('utf-16'), lowOnly=True) except UnicodeDecodeError: reason = 'could not decode bytes in position %d-%d (%s-%s)' % ( pos, pos + 1, hex(ord( self.bytes[pos])), hex(ord(self.bytes[pos + 1]))) print '<todo what="WordDocumentStream::retrieveCP(): %s"/>' % reason return globals.encodeName(self.bytes[pos:pos + 2].decode( 'utf-16', errors="replace"), lowOnly=True)
def __printDirHeader (self, direntry, byteLen): dirname = direntry.Name dirname = globals.encodeName(dirname) print("") print("="*globals.OutputWidth) if direntry.isStorage(): print("%s (storage)"%dirname) else: print("%s (stream, size: %d bytes)"%(dirname, byteLen)) print("-"*globals.OutputWidth)
def __printDirHeader(self, direntry, byteLen): dirname = direntry.Name dirname = globals.encodeName(dirname) print("") print("=" * globals.OutputWidth) if direntry.isStorage(): print("%s (storage)" % dirname) else: print("%s (stream, size: %d bytes)" % (dirname, byteLen)) print("-" * globals.OutputWidth)
def __addSiblings( self, entries, parent, child ): # add left siblings nextLeft = child.Entry.DirIDLeft if ( nextLeft > 0 ): newEntry = DirNode( entries[ nextLeft ] ) newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name ) if newEntry.Entry.DirIDRoot > 0: newEntry.HierachicalName = newEntry.HierachicalName + '/' self.__addSiblings( entries, parent, newEntry ) parent.Nodes.insert( 0, newEntry ) nextRight = child.Entry.DirIDRight # add children to the right if ( nextRight > 0 ): newEntry = DirNode( entries[ nextRight ] ) newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name ) if newEntry.Entry.DirIDRoot > 0: newEntry.HierachicalName = newEntry.HierachicalName + '/' self.__addSiblings( entries, parent, newEntry ) parent.Nodes.append( newEntry )
def createDOM(self, wb): nd = node.Element("external-sheet-cache") # 1st char is always 0x1. nd.setAttr("url", globals.encodeName(self.docURL[1:])) for sheet in self.__sheets: if sheet[1] == None: continue elem = sheet[1].createDOM(wb) elem.setAttr("name", sheet[0]) nd.appendChild(elem) return nd
def createDOM (self, wb): nd = node.Element("external-sheet-cache") # 1st char is always 0x1. nd.setAttr("url", globals.encodeName(self.docURL[1:])) for sheet in self.__sheets: if sheet[1] == None: continue elem = sheet[1].createDOM(wb) elem.setAttr("name", sheet[0]) nd.appendChild(elem) return nd
def __buildTreeImpl(self, entries, parent ): if ( parent.Entry.DirIDRoot > 0 ): newEntry = DirNode( entries[ parent.Entry.DirIDRoot ] ) newEntry.HierachicalName = parent.HierachicalName + globals.encodeName( newEntry.Entry.Name ) if ( newEntry.Entry.DirIDRoot > 0 ): newEntry.HierachicalName = newEntry.HierachicalName + '/' self.__addSiblings( entries, parent, newEntry ) parent.Nodes.append( newEntry ) for child in parent.Nodes: if child.Entry.DirIDRoot > 0: self.__buildTreeImpl( entries, child )
def __getString(self, limit): bytes = [] count = 0 pos = self.pos while True: if (limit is not None) and count == limit: break i = self.getuInt8(pos=pos) pos += 1 j = self.getuInt8(pos=pos) pos += 1 if i != 0 or j != 0: bytes.append(i) bytes.append(j) else: break count += 1 return (self.quoteAttr(globals.encodeName(globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes))))), pos)
def dump(self): print '<dictionaryEntry offset="%s" index="%s">' % (self.pos, self.index) self.printAndSet("PropertyIdentifier", self.readuInt32()) self.printAndSet("Length", self.readuInt32()) bytes = [] for dummy in range(self.Length): c = self.readuInt8() if c == 0: break bytes.append(c) # TODO support non-latin1 encoding = "latin1" print '<Name value="%s"/>' % globals.encodeName( "".join(map(lambda c: chr(c), bytes)).decode(encoding), lowOnly=True).encode('utf-8') print '</dictionaryEntry>' self.parent.pos = self.pos
def dump(self): print '<stream name="%s" size="%s"/>' % (self.quoteAttr( globals.encodeName(self.name)), self.size)
def dump(self): print '<stream name="%s" size="%s"/>' % (self.quoteAttr(globals.encodeName(self.name)), self.size)
def __printDirHeader(self, dirname, byteLen): dirname = globals.encodeName(dirname) print("") print("=" * 68) print("%s (size: %d bytes)" % (dirname, byteLen)) print("-" * 68)
def retrieveCP(self, cp): pos, compressed = self.__cpToOffset(cp) if compressed: return globals.encodeName(self.bytes[pos]) else: return globals.encodeName(self.bytes[pos:pos+2].decode('utf-16'), lowOnly = True)