def parse(reader: BinaryReader, is_local: bool) -> 'ZXHPkWareUnix': raw_atime, raw_mtime, uid, gid = reader.read_struct('IIHH') device = link_target = None special_data = reader.read_remainder() # KLUDGE: normally we'd need to know the type of the file to know whether special_data represents the link # target (for sym/hardlinks) or the major/minor device numbers (for char/block devices). We use a trick instead. # Given that the major number is almost certainly 16 bits at best, there will almost definitely be a \x00 byte # in the special data, whereas for a link this will definitely not be the case. if (len(special_data) == 8) and (b'\x00' in special_data): device = BinaryReader(special_data, big_endian=False).read_struct('II') else: link_target = special_data return ZXHPkWareUnix(is_local, (), None, atime=iso_from_unix_time(raw_atime), mtime=iso_from_unix_time(raw_mtime), uid=uid, gid=gid, device=device, link_target=link_target)
def parse(tag: int, value: bytes) -> 'NTFSInfoTag': reader = BinaryReader(value, big_endian=False) if tag == 1: return NTFSInfoTimestampsTag( *(iso_from_ntfs_time(raw_time) for raw_time in reader.read_struct('QQQ', 'timestamps'))) return NTFSInfoUnhandledTag(tag, value)
def parse(reader: BinaryReader) -> 'IZUnicodeCommentData': version = reader.read_uint8('version') if version == 1: standard_comment_crc32 = reader.read_uint32('CRC32') comment = reader.read_remainder().decode('utf-8') return IZUnicodeCommentDataV1(comment, standard_comment_crc32) else: return IZUnicodeCommentDataUnsupported(version, reader.read_remainder())
def parse(reader: BinaryReader) -> 'IZUnicodePathData': version = reader.read_uint8('version') if version == 1: standard_path_crc32 = reader.read_uint32('CRC32') path = reader.read_remainder().decode('utf-8') return IZUnicodePathDataV1(path, standard_path_crc32) else: return IZUnicodePathDataUnsupported(version, reader.read_remainder())
def parse(reader: BinaryReader, is_local: bool) -> 'ZXHPkWareNTFS': reserved = reader.read_uint32('reserved field') tags = tuple( NTFSInfoTag.parse(tag, value) for tag, value in reader.iter_tlv(type_bytes=2, length_bytes=2)) unhandled = set(tag.tag for tag in tags if isinstance(tag, NTFSInfoUnhandledTag)) warnings = ( f"Unhandled tag(s) of type {', '.join(str(tag) for tag in unhandled)}", ) if len(unhandled) > 0 else () return ZXHPkWareNTFS(is_local, warnings, None, tags, reserved)
def parse(reader: BinaryReader, is_local: bool) -> 'ZXHInfoZipUnixV2': if is_local: uid, gid = reader.read_struct('HH', 'UID/GID') else: uid = gid = None return ZXHInfoZipUnixV2(is_local, (), None, uid=uid, gid=gid)
def parse(reader: BinaryReader, is_local: bool) -> 'ZXHExtendedTimestamps': flags = reader.read_uint8('flags') mtime = iso_from_unix_time( reader.read_uint32('mtime')) if flags & (1 << 0) else None atime = iso_from_unix_time( reader.read_uint32('atime')) if is_local and (flags & (1 << 1)) else None ctime = iso_from_unix_time( reader.read_uint32('ctime')) if is_local and (flags & (1 << 2)) else None return ZXHExtendedTimestamps(is_local, (), None, mtime=mtime, atime=atime, ctime=ctime)
def parse(reader: BinaryReader, is_local: bool) -> 'ZXHInfoZipUnixV1': raw_atime, raw_mtime = reader.read_struct('II', 'timestamps') if not reader.eof(): uid, gid = reader.read_struct('HH', 'UID/GID') else: uid = gid = None return ZXHInfoZipUnixV1( is_local, (), None, mtime=iso_from_unix_time(raw_mtime), atime=iso_from_unix_time(raw_atime), uid=uid, gid=gid, )
def parse(reader: BinaryReader) -> 'NTSecurityDescriptorData': version, compress_type, crc = reader.read_struct('BHI') compressed_data = reader.read_remainder() try: raw_data = decompress_now(compressed_data, compress_type) except Exception: return NTSecurityDescriptorDataCompressed(version, compress_type, compressed_data) if version == 0: return NTSecurityDescriptorDataV0( compress_type, raw_data, decode_nt_security_descriptor(raw_data)) return NTSecurityDescriptorDataDecompressed(version, compress_type, raw_data)
def _parse_zip_extra_data(data: bytes, is_local: bool) -> List['ZipExtraHeader']: result = [] reader = BinaryReader(data, big_endian=False) try: for header_id, value in reader.iter_tlv(type_bytes=2, length_bytes=2, meaning='ZIP extra headers'): result.append( ZipExtraHeader.parse_from_tlv(header_id, value, is_local)) except Exception as e: raise MalformedZipExtraDataError( f"Malformed binary data for ZIP {'local' if is_local else 'central'} extra field" ) from e return result
def parse_from_tlv(header_id: int, data: bytes, is_local: bool) -> 'ZipExtraHeader': reader = BinaryReader(data, big_endian=False) header_class = ZipExtraHeader.get_header_class_for_magic(header_id) if header_class is None: return ZXHUnrecognized(header_id, is_local, (), None, reader.read_remainder()) result = header_class.parse(reader, is_local) if not reader.eof(): result = replace(result, warnings=(*result.warnings, "Header was not fully consumed"), unconsumed_data=reader.read_remainder()) return result
def parse(reader: BinaryReader, is_local: bool) -> 'ZXHZip64': # Due to the way this header works (subfields may be included or omitted depending on other fields in the # local/central directory record), we can't decode it here completely as we need a lot more context. So we just # return a list of unmarked 64-bit sizes, and the 32-bit disk start number. total_bytes = reader.bytes_remaining() if (total_bytes > 28) or (total_bytes % 4 != 0): raise MalformedZipExtraDataError( f"ZIP64 extra header size should be a multiple of 4 between 0 and 28, but it is {total_bytes}" ) n_64bit_values = total_bytes >> 3 sizes = reader.read_struct( f'{n_64bit_values}Q', '64-bit sizes') if n_64bit_values > 0 else () disk_start_no = reader.read_uint32('disk start no.') if ( total_bytes % 8 != 0) else None return ZXHZip64(is_local, (), None, sizes, disk_start_no)
def parse(reader: BinaryReader, is_local: bool) -> 'ZXHNTSecurityDescriptor': descriptor_size = reader.read_uint32('descriptor size') data = None warnings = () if is_local: data = NTSecurityDescriptorData.parse(reader) if isinstance(data, NTSecurityDescriptorDataCompressed): warnings = (f"Failed to decompress descriptor", ) elif data.__class__ == NTSecurityDescriptorDataDecompressed: warnings = (f"Don't know how to decode this format version", ) return ZXHNTSecurityDescriptor(is_local, warnings, None, descriptor_size, data)
def _read_archive(self): reader = BinaryReader(self._fileobj, big_endian=False) if not reader.peek_magic(GZIP_MAGIC): raise NotAGZipFileError(reader.name()) entries = [] while reader.bytes_remaining() > 0: if reader.peek(1) == b'\x00': while reader.bytes_remaining() > 0: data = reader.read_at_most(1000000) if data.count(b'\x00') < len(data): raise GZipFileCorruptError(reader.name()) from \ BadGZipFileError("There is data after the entries, but it is not all zeroes") self._trailing_zeros += len(data) break entries.append(GZEntry.read_from_binary(reader)) self._entries = tuple(entries)
def parse(reader: BinaryReader) -> 'IZUnixV3Data': version = reader.read_uint8('version') if version == 1: uid_size = reader.read_uint8('UID size') uid = PosixUID( reader.read_fixed_size_int(uid_size, signed=False, meaning='UID')) gid_size = reader.read_uint8('GID size') gid = PosixGID( reader.read_fixed_size_int(gid_size, signed=False, meaning='GID')) return IZUnixV3DataV1(uid, gid) else: return IZUnixV3DataUnsupported(version, reader.read_remainder())
def _init_reader(raw_data: Union[bytes, BinaryReader]) -> BinaryReader: if isinstance(raw_data, BinaryReader): return raw_data else: return BinaryReader(raw_data, big_endian=False)
def decode_nt_security_descriptor(raw_data: Union[bytes, BinaryReader]) -> NTSecurityDescriptor: reader = _init_reader(raw_data) base_pos = reader.tell() reader = BinaryReader(raw_data, big_endian=False) revision = reader.read_uint8("NT Security Descriptor revision") if revision != 1: raise NotImplementedError( "NT Security Descriptor has revision {revision}, can only parse revision 1" ) _pad_byte, ctrl_flags, owner_ptr, group_ptr, sacl_ptr, dacl_ptr = \ reader.read_struct('BHIIII', 'NT Security Descriptor header') ctrl_flags = NTSdControlFlags(ctrl_flags) if NTSdControlFlags.SELF_RELATIVE not in ctrl_flags: raise ValueError("Can only parse self-relative NT Security Descriptors!") def _parse_trustee_info(offset: int, default_flag: NTSdControlFlags) -> NTSdTrusteeInfo: sid = None if offset != 0: reader.seek(base_pos + offset) sid = decode_nt_security_id(reader) return NTSdTrusteeInfo( sid=sid, defaulted=(default_flag in ctrl_flags), ) def _parse_acl_info(offset: int, is_dacl: bool) -> NTSdACLInfo: is_present = (NTSdControlFlags.DACL_PRESENT if is_dacl else NTSdControlFlags.SACL_PRESENT) in ctrl_flags defaulted_flag = NTSdControlFlags.DACL_DEFAULTED if is_dacl else NTSdControlFlags.SACL_DEFAULTED auto_inh_flag = NTSdControlFlags.DACL_AUTO_INHERITED if is_dacl else NTSdControlFlags.SACL_AUTO_INHERITED auto_inh_req_flag = \ NTSdControlFlags.DACL_AUTO_INHERIT_REQ if is_dacl else NTSdControlFlags.SACL_AUTO_INHERIT_REQ protected_flag = NTSdControlFlags.DACL_PROTECTED if is_dacl else NTSdControlFlags.SACL_PROTECTED entries = () if offset != 0: reader.seek(base_pos + offset) entries = tuple(decode_nt_acl(reader)) return NTSdACLInfo( present=is_present, null=is_present and (offset == 0), entries=entries, defaulted=is_present and (defaulted_flag in ctrl_flags), auto_inherited=auto_inh_flag in ctrl_flags, auto_inherit_req=auto_inh_req_flag in ctrl_flags, protected=protected_flag in ctrl_flags, ) return NTSecurityDescriptor( owner=_parse_trustee_info(owner_ptr, NTSdControlFlags.OWNER_DEFAULTED), group=_parse_trustee_info(group_ptr, NTSdControlFlags.GROUP_DEFAULTED), dacl=_parse_acl_info(dacl_ptr, True), sacl=_parse_acl_info(sacl_ptr, False), rm_control_valid=(NTSdControlFlags.RM_CONTROL_VALID in ctrl_flags), )
def read_from_binary(reader: BinaryReader) -> 'GZEntry': entry_start_offset = reader.tell() raw_extra_field = None raw_filename = None raw_comment = None try: reader.expect_magic(GZIP_MAGIC, 'GZip magic') compression_method, flags, timestamp, compress_flags, host_os = \ reader.read_struct('BBIBB', 'GZip entry header') if compression_method != GZCompressionMethod.DEFLATE: raise NotImplementedError( f"Fount a GZ entry with compression method {compression_method}. Only Deflate entries are " f"supported (don't know how to parse past other types)") if flags & GZEntryFlags.RESERVED_MASK: raise NotImplementedError( f"Found a GZ entry with flags: {flags:08b}. As per the spec, cannot parse the entry when reserved " f"bits are set, as they may indicate the presence of extra members we don't know how to parse." ) if flags & GZEntryFlags.HAS_EXTRA: raw_extra_field = reader.read_length_prefixed_bytes( 'extra field', length_bytes=2) if flags & GZEntryFlags.HAS_NAME: raw_filename = reader.read_null_terminated_bytes( 'entry original name') if flags & GZEntryFlags.HAS_COMMENT: raw_comment = reader.read_null_terminated_bytes( 'entry comment') if flags & GZEntryFlags.HAS_HEADER_CRC: reader.skip_bytes( 2 ) # Can't be arsed to check the header CRC, if there are errors we'll know soon data_start_offset = reader.tell() uncompressed_length = 0 crc32 = zlib.crc32(b'') BUF_SIZE = 64000000 decompressor = zlib.decompressobj(-zlib.MAX_WBITS) compressed_data = b'' while not decompressor.eof: if len(compressed_data) == 0: compressed_data = reader.read_at_most(BUF_SIZE) if len(compressed_data) == 0: raise BadGZipFileError( "File ends in the middle of a compressed block") decompressed_data = decompressor.decompress( compressed_data, BUF_SIZE) uncompressed_length += len(decompressed_data) crc32 = zlib.crc32(decompressed_data, crc32) compressed_data = decompressor.unconsumed_tail reader.seek(-len(decompressor.unused_data), os.SEEK_CUR) declared_crc32, declared_size = reader.read_struct( 'II', 'GZip entry footer') crc32 &= 0xffffffff if crc32 != declared_crc32: raise BadGZipFileError( f"Entry CRC failed (declared: 0x{declared_crc32:08x}, actual: 0x{crc32:08x})" ) if declared_size != uncompressed_length & 0xffffffff: raise BadGZipFileError( f"Entry size does not match the declared size in the lowest 32 bits (declared: {declared_size}), " f"actual: {uncompressed_length})") except BadGZipFileError as e: raise GZipFileCorruptError(reader.name()) from e except BinaryReaderFormatError as e: raise GZipFileCorruptError(reader.name()) from e original_filename, raw_filename = _try_decode(raw_filename) comment, raw_comment = _try_decode(raw_comment) if compression_method == GZCompressionMethod.DEFLATE: compress_flags = GZDeflateCompressionFlags(compress_flags) return GZEntry( flags=GZEntryFlags(flags), compression_method=_as_enum(compression_method, GZCompressionMethod), compression_flags=compress_flags, compressed_length=reader.tell() - data_start_offset - 8, uncompressed_length=uncompressed_length, uncompressed_crc32=declared_crc32, entry_start_offset=entry_start_offset, data_start_offset=data_start_offset, host_os=_as_enum(host_os, GZHostOS) if host_os != 255 else None, unix_timestamp=timestamp if timestamp != 0 else None, raw_extra_field=raw_extra_field, original_filename=original_filename, raw_original_filename=raw_filename, comment=comment, raw_comment=raw_comment, )