def __init__(self, parent, name, description=None, strip=None, nbytes=None, truncate=None): Bytes.__init__(self, parent, name, 1, description) self._format = "WidePascalString16" self._strip = strip self._truncate = truncate self._character_size = 2 self._charset = "UTF-16-LE" self._content_offset = 2 self._content_size = self._character_size * self._parent.stream.readBits( self.absolute_address, self._content_offset*8, self._parent.endian) self._size = (self._content_size + self.content_offset) * 8
def getFieldType(self): info = self.charset if self._strip: if isinstance(self._strip, (str, unicode)): info += ",strip=%s" % makePrintable(self._strip, "ASCII", quote="'") else: info += ",strip=True" return "%s<%s>" % (Bytes.getFieldType(self), info)
def __init__( self, parent, name, length, decompressor, description=None, parser=None, filename=None, mime_type=None, parser_class=None, ): if filename: if not isinstance(filename, unicode): filename = makePrintable(filename, "ISO-8859-1") if not description: description = 'File "%s" (%s)' % (filename, humanFilesize(length)) Bytes.__init__(self, parent, name, length, description) self.setupInputStream(decompressor, parser, filename, mime_type, parser_class)
def _createInputStream(self, **args): tags = args.setdefault("tags",[]) try: tags.append(("mime", self["../../FileMimeType/string"].value)) except MissingField: pass filename = self._getFilename() if filename: tags.append(("filename", filename)) return Bytes._createInputStream(self, **args)
def __init__(self, parent, name, length, description=None, parser=None, filename=None, mime_type=None, parser_class=None): if filename: if not isinstance(filename, unicode): filename = makePrintable(filename, "ISO-8859-1") if not description: description = 'File "%s" (%s)' % (filename, humanFilesize(length)) Bytes.__init__(self, parent, name, length, description) def createInputStream(cis, **args): tags = args.setdefault("tags",[]) if parser_class: tags.append(( "class", parser_class )) if parser is not None: tags.append(( "id", parser.PARSER_TAGS["id"] )) if mime_type: tags.append(( "mime", mime_type )) if filename: tags.append(( "filename", filename )) return cis(**args) self.setSubIStream(createInputStream)
def createFields(self): yield String(self, "id", 3, "Identifier (BZh)", charset="ASCII") yield Character(self, "blocksize", "Block size (KB of memory needed to uncompress)") yield UInt8(self, "blockheader", "Block header") if self["blockheader"].value == 0x17: yield String(self, "id2", 4, "Identifier2 (re8P)", charset="ASCII") yield UInt8(self, "id3", "Identifier3 (0x90)") elif self["blockheader"].value == 0x31: yield String(self, "id2", 5, "Identifier 2 (AY&SY)", charset="ASCII") if self["id2"].value != "AY&SY": raise ParserError("Invalid identifier 2 (AY&SY)!") else: raise ParserError("Invalid block header!") yield textHandler(UInt32(self, "crc32", "CRC32"), hexadecimal) if self._size is None: # TODO: is it possible to handle piped input? raise NotImplementedError size = (self._size - self.current_size) / 8 if size: for tag, filename in self.stream.tags: if tag == "filename" and filename.endswith(".bz2"): filename = filename[:-4] break else: filename = None data = Bytes(self, "file", size) if has_deflate: CompressedField(self, Bunzip2) def createInputStream(**args): if filename: args.setdefault("tags", []).append(("filename", filename)) return self._createInputStream(**args) data._createInputStream = createInputStream yield data
def createFields(self): yield GUID(self, "clsid", "16 bytes GUID used by some apps") yield UInt16(self, "ver_min", "Minor version") yield UInt16(self, "ver_maj", "Minor version") yield Bytes(self, "endian", 2, "Endian (0xFFFE for Intel)") yield UInt16(self, "bb_shift", "Log, base 2, of the big block size") yield UInt16(self, "sb_shift", "Log, base 2, of the small block size") yield NullBytes(self, "reserved[]", 6, "(reserved)") yield UInt32( self, "csectdir", "Number of SECTs in directory chain for 4 KB sectors (version 4)") yield UInt32(self, "bb_count", "Number of Big Block Depot blocks") yield SECT(self, "bb_start", "Root start block") yield NullBytes(self, "transaction", 4, "Signature used for transactions (must be zero)") yield UInt32(self, "threshold", "Maximum size for a mini stream (typically 4096 bytes)") yield SECT(self, "sb_start", "Small Block Depot start block") yield UInt32(self, "sb_count") yield SECT(self, "db_start", "First block of DIFAT") yield UInt32(self, "db_count", "Number of SECTs in DIFAT")
def createFields(self): yield Bytes(self, "magic", 8, "File magic (bplist00)") if self.size: self.seekByte(self.size//8-32, True) else: # FIXME: UNTESTED while True: try: self.seekByte(1024) except: break self.seekByte(self.size//8-32) yield BPListTrailer(self, "trailer") self.seekByte(self['trailer/offsetTableOffset'].value) yield BPListOffsetTable(self, "offset_table") for i in self.array("offset_table/offset"): if self.current_size > i.value*8: self.seekByte(i.value) elif self.current_size < i.value*8: # try to detect files with gaps or unparsed content yield RawBytes(self, "padding[]", i.value-self.current_size//8) yield BPListObject(self, "object[]")
def createFields(self): yield String(self, 'HeaderString', 16) yield UInt16(self, 'PageSize') yield UInt8(self, 'WriteVersion') yield UInt8(self, 'ReadVersion') yield UInt8(self, 'ReservedSpace') yield UInt8(self, 'MaxEmbeddedPayloadFraction') yield UInt8(self, 'MinEmbeddedPayloadFraction') yield UInt8(self, 'LeafPayloadFraction') yield UInt32(self, 'FileChangeCounter') yield UInt32(self, 'SizeInPages') yield UInt32(self, 'FirstFreelistPage') yield UInt32(self, 'FreelistTotal') yield UInt32(self, 'SchemaCookie') yield UInt32(self, 'SchemaFormatNumber') yield UInt32(self, 'DefaultPageCacheSize') yield UInt32(self, 'MagicPageNumber') yield UInt32(self, 'TextEncoding') yield UInt32(self, 'UserVersion') yield UInt32(self, 'IncrementalVacuumMode') yield Bytes(self, 'ReservedForExpansion', 24) yield UInt32(self, 'VersionValidFor') yield UInt32(self, 'SqliteVersion')
def createFields(self): yield Bytes(self, "signature", 4, r"RPM file signature (\xED\xAB\xEE\xDB)") yield UInt8(self, "major_ver", "Major version") yield UInt8(self, "minor_ver", "Minor version") yield Enum(UInt16(self, "type", "RPM type"), RpmFile.TYPE_NAME) yield UInt16(self, "architecture", "Architecture") yield String(self, "name", 66, "Archive name", strip="\0", charset="ASCII") yield UInt16(self, "os", "OS") yield UInt16(self, "signature_type", "Type of signature") yield NullBytes(self, "reserved", 16, "Reserved") yield PropertySet(self, "checksum", "Checksum (signature)") yield PropertySet(self, "header", "Header") if self._size is None: # TODO: is it possible to handle piped input? raise NotImplementedError size = (self._size - self.current_size) // 8 if size: if 3 <= size and self.stream.readBytes(self.current_size, 3) == "BZh": yield SubFile(self, "content", size, "bzip2 content", parser=Bzip2Parser) else: yield SubFile(self, "content", size, "gzip content", parser=GzipParser)
def createFields(self): yield Bytes(self, "signature", 4, r'File signature ("\1pcf")') yield UInt32(self, "nb_toc") entries = [] for index in xrange(self["nb_toc"].value): entry = TOC(self, "toc[]") yield entry entries.append(entry) entries.sort(key=lambda entry: entry["offset"].value) for entry in entries: size = entry["size"].value padding = self.seekByte(entry["offset"].value) if padding: yield padding maxsize = (self.size - self.current_size) // 8 if maxsize < size: self.warning("Truncate content of %s to %s bytes (was %s)" % (entry.path, maxsize, size)) size = maxsize if not size: continue if entry["type"].value == 1: yield Properties(self, "properties", entry, "Properties", size=size * 8) elif entry["type"].value == 128: yield GlyphNames(self, "glyph_names", entry, "Glyph names", size=size * 8) else: yield RawBytes(self, "data[]", size, "Content of %s" % entry.path)
def __init__(self, parent, name): Bytes.__init__(self, parent, name, 16)
def parseData(self): size = (self.size - self.current_size) // 8 if size: yield Bytes(self, "data", size)
def __init__(self, parent, name, format, description=None, strip=None, charset=None, nbytes=None, truncate=None): Bytes.__init__(self, parent, name, 1, description) # Is format valid? assert format in self.VALID_FORMATS # Store options self._format = format self._strip = strip self._truncate = truncate # Check charset and compute character size in bytes # (or None when it's not possible to guess character size) if not charset or charset in self.CHARSET_8BIT: self._character_size = 1 # one byte per character elif charset in self.UTF_CHARSET: self._character_size = None else: raise FieldError("Invalid charset for %s: \"%s\"" % (self.path, charset)) self._charset = charset # It is a fixed string? if nbytes is not None: assert self._format == "fixed" # Arbitrary limits, just to catch some bugs... if not (1 <= nbytes <= 0xffff): raise FieldError("Invalid string size for %s: %s" % (self.path, nbytes)) self._content_size = nbytes # content length in bytes self._size = nbytes * 8 self._content_offset = 0 else: # Format with a suffix: Find the end of the string if self._format in self.SUFFIX_FORMAT: self._content_offset = 0 # Choose the suffix suffix = self.suffix_str # Find the suffix length = self._parent.stream.searchBytesLength( suffix, False, self.absolute_address) if length is None: raise FieldError("Unable to find end of string %s (format %s)!" % (self.path, self._format)) if 1 < len(suffix): # Fix length for little endian bug with UTF-xx charset: # u"abc" -> "a\0b\0c\0\0\0" (UTF-16-LE) # search returns length=5, whereas real lenght is 6 length = alignValue(length, len(suffix)) # Compute sizes self._content_size = length # in bytes self._size = (length + len(suffix)) * 8 # Format with a prefix: Read prefixed length in bytes else: assert self._format in self.PASCAL_FORMATS # Get the prefix size prefix_size = self.PASCAL_FORMATS[self._format] self._content_offset = prefix_size # Read the prefix and compute sizes value = self._parent.stream.readBits( self.absolute_address, prefix_size*8, self._parent.endian) self._content_size = value # in bytes self._size = (prefix_size + value) * 8 # For UTF-16 and UTF-32, choose the right charset using BOM if self._charset in self.UTF_CHARSET: # Charset requires a BOM? bomsize, endian = self.UTF_CHARSET[self._charset] if endian == "BOM": # Read the BOM value nbytes = bomsize // 8 bom = self._parent.stream.readBytes(self.absolute_address, nbytes) # Choose right charset using the BOM bom_endian = self.UTF_BOM[bomsize] if bom not in bom_endian: raise FieldError("String %s has invalid BOM (%s)!" % (self.path, repr(bom))) self._charset = bom_endian[bom] self._content_size -= nbytes self._content_offset += nbytes # Compute length in character if possible if self._character_size: self._length = self._content_size // self._character_size else: self._length = None
def createFields(self): yield Enum(Bits(self, "marker_type", 4), {0: "Simple", 1: "Int", 2: "Real", 3: "Date", 4: "Data", 5: "ASCII String", 6: "UTF-16-BE String", 8: "UID", 10: "Array", 13: "Dict",}) markertype = self['marker_type'].value if markertype == 0: # Simple (Null) yield Enum(Bits(self, "value", 4), {0: "Null", 8: "False", 9: "True", 15: "Fill Byte",}) if self['value'].display == "False": self.xml=lambda prefix:prefix + "<false/>" elif self['value'].display == "True": self.xml=lambda prefix:prefix + "<true/>" else: self.xml=lambda prefix:prefix + "" elif markertype == 1: # Int yield Bits(self, "size", 4, "log2 of number of bytes") size=self['size'].value # 8-bit (size=0), 16-bit (size=1) and 32-bit (size=2) numbers are unsigned # 64-bit (size=3) numbers are signed yield GenericInteger(self, "value", (size>=3), (2**size)*8) self.xml=lambda prefix:prefix + "<integer>%s</integer>"%self['value'].value elif markertype == 2: # Real yield Bits(self, "size", 4, "log2 of number of bytes") if self['size'].value == 2: # 2**2 = 4 byte float yield Float32(self, "value") elif self['size'].value == 3: # 2**3 = 8 byte float yield Float64(self, "value") else: # FIXME: What is the format of the real? yield Bits(self, "value", (2**self['size'].value)*8) self.xml=lambda prefix:prefix + "<real>%s</real>"%self['value'].value elif markertype == 3: # Date yield Bits(self, "extra", 4, "Extra value, should be 3") cvt_time=lambda v:datetime(2001,1,1) + timedelta(seconds=v) yield displayHandler(Float64(self, "value"),lambda x:humanDatetime(cvt_time(x))) self.xml=lambda prefix:prefix + "<date>%s</date>"%(cvt_time(self['value'].value).isoformat()) elif markertype == 4: # Data yield BPListSize(self, "size") if self['size'].value: yield Bytes(self, "value", self['size'].value) self.xml=lambda prefix:prefix + "<data>\n%s\n%s</data>"%(self['value'].value.encode('base64').strip(),prefix) else: self.xml=lambda prefix:prefix + '<data></data>' elif markertype == 5: # ASCII String yield BPListSize(self, "size") if self['size'].value: yield String(self, "value", self['size'].value, charset="ASCII") self.xml=lambda prefix:prefix + "<string>%s</string>"%(self['value'].value.encode('iso-8859-1')) else: self.xml=lambda prefix:prefix + '<string></string>' elif markertype == 6: # UTF-16-BE String yield BPListSize(self, "size") if self['size'].value: yield String(self, "value", self['size'].value*2, charset="UTF-16-BE") self.xml=lambda prefix:prefix + "<string>%s</string>"%(self['value'].value.encode('utf-8')) else: self.xml=lambda prefix:prefix + '<string></string>' elif markertype == 8: # UID yield Bits(self, "size", 4, "Number of bytes minus 1") yield GenericInteger(self, "value", False, (self['size'].value + 1)*8) self.xml=lambda prefix:prefix + "" # no equivalent? elif markertype == 10: # Array yield BPListSize(self, "size") size = self['size'].value if size: yield BPListArray(self, "value", size) self.xml=lambda prefix:self['value'].createXML(prefix) elif markertype == 13: # Dict yield BPListSize(self, "size") yield BPListDict(self, "value", self['size'].value) self.xml=lambda prefix:self['value'].createXML(prefix) else: yield Bits(self, "value", 4) self.xml=lambda prefix:''
def readBitString(self, content_size): yield UInt8(self, "padding_size", description="Number of unused bits") if content_size > 1: yield Bytes(self, "value", content_size - 1)
def __init__(self, parent): Bytes.__init__(self, parent, 'file', parent['size'].value, None)
def createDisplay(self): if self._display_pattern: return u"<padding pattern=%s>" % makePrintable(self.pattern, "ASCII", quote="'") else: return Bytes.createDisplay(self)
def createRawDisplay(self): return Bytes.createDisplay(self)
def createDisplay(self): if self._display_pattern: return u"<padding pattern=%s>" % makePrintable( self.pattern, "ASCII", quote="'") else: return Bytes.createDisplay(self)
def createFields(self): yield Bytes(self, "magic", 4, "Mach-O signature") yield UInt32(self, "nfat_arch", "Number of architectures in this fat file") for i in xrange(self['nfat_arch'].value): yield MachoFatArch(self, 'arch[]')
def createFields(self): if self._size is None: self._size = self._getSize() yield Bytes(self, "data", self._size / 8)
def createFields(self): for off, byte in getStrips(self._ifd): self.seekByte(off, relative=False) yield Bytes(self, "strip[]", byte)
def createFields(self): yield Bytes(self, "id", 8, r"PNG identifier ('\x89PNG\r\n\x1A\n')") while not self.eof: yield Chunk(self, "chunk[]")
def readBinary(self, content_size): yield Bytes(self, "value", content_size)
def createFields(self): yield Enum( Bytes(self, "signature", 4, "Python file signature and version"), self.STR_MAGIC) yield TimestampUnix32(self, "timestamp", "Timestamp") yield Object(self, "content")
def createFields(self): yield Bytes(self, "data", self.datasize/8) padding = self._size - self.current_size if padding: yield createPaddingField(self, padding)
def readOctetString(self, content_size): yield Bytes(self, "value", content_size)
def createDisplay(self): if self._display_pattern: return "<null>" else: return Bytes.createDisplay(self)
def __init__(self, parent, name, nbytes, description="Padding", pattern=None): """ pattern is None or repeated string """ assert (pattern is None) or (isinstance(pattern, str)) Bytes.__init__(self, parent, name, nbytes, description) self.pattern = pattern self._display_pattern = self.checkPattern()
def createFields(self): yield Bytes(self, "sync", 3) yield textHandler(UInt8(self, "tag"), hexadecimal) if self.parser and self['tag'].value != 0xb7: yield self.parser(self, "content")