def parse_entry_long(entry_bytes_: bytes, my_check: int) -> dict: order_ = Int8ul.parse(entry_bytes_[0:1]) names0 = entry_bytes_[1:11] if Int8ul.parse(entry_bytes_[12:13]) != 0 or Int16ul.parse( entry_bytes_[26:28]) != 0 or Int8ul.parse( entry_bytes_[11:12]) != 15: return {} if Int8ul.parse(entry_bytes_[13:14]) != my_check: return {} names1 = entry_bytes_[14:26] names2 = entry_bytes_[28:32] return { 'order': order_, 'name1': names0, 'name2': names1, 'name3': names2, 'is_last': bool(order_ & 0x40 == 0x40) }
def _parse(self): try: header = PascalUtf16(Int16ul).parse(self._raw) except (ConstructError, UnicodeDecodeError) as e: raise InvalidIPLError('Invalid IPL structure: {0}\n{1}'.format( e, hexdump(self._raw[:0x200]))) try: # IPL's code section is usually contained is the first 9 sectors. The remaining sectors are filled with # padding but it appears that the last (15th) sector can sometimes hold data not related to the boot process # and we need to exclude that from hash calculation. invariantCode = self._raw[:14 * 512] except IndexError: raise InvalidIPLError( 'Invalid sample size for IPL: {0} (should be 15 * 512-bytes sectors)' .format(len(self._raw))) expectedLoader = None # Starting with NT 6.2, IPL has a localized string that must be excluded from hash computation. # The difference between these two kinds of IPL can be told from the instruction located at 0x56 : # a Jump Short (EB) in case of IPL<6.2 or a Jump Near (E9) otherwise if header == 'BOOTMGR' and self._raw[0x56].encode( 'hex').upper() == 'E9': # The offset of the localized string seems to be stored in a DWORD at 0x117 (just before the beginning # of the assembly code). But the value seems to be an offset relative to the start of the whole # boot record (including the VBR) and not just the IPL. # Therefore we need to substract 0x200 to get the offset inside the IPL. strOffset = Int16ul.parse(self._raw[0x117:]) - 0x200 # Exclude from hash calculation everything between the string offset and the beginning of code invariantCode = invariantCode[:strOffset] + invariantCode[0x119:] expectedLoader = 'NT6.2+ IPL' codeHash = hashlib.sha256(invariantCode) self._matchHash(codeHash, expectedLoader) # If no whitelisted signature matched, try some simple heuristics to flag this IPL as malicious # Note that the self._checkCode method is only given the "stripped" code section to help the disassembling. # This will obviously leads to broken offsets, but it doesn't matter since the heuristics don't use them. if len(self._signature) == 0: self._checkCode(invariantCode)
def _getInvariantCode(self, vbrType, vbrStruct): """ Helper method that finds all the sections of the boot code that can be hashed and compared to a whitelist. This means that localized strings and other variable parameters (BPB, etc...) are excluded. Currently, this method only supports NTFS and Bitlocker VBR. Args: vbrType: unicode string corresponding to the VBR type ('NTFS' or 'bitlocker') vbrStruct: construct.container of the VBR Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code) """ codeStart = 0 codeEnd = None invariantCode = str() expectedLoader = None if vbrType == 'NTFS': # The first three bytes are a jump over the NTFS BPB to where the code really starts (0x54) and a NOP invariantCode += vbrStruct.JumpOverBPB codeStart = 0x54 # NTFS VBR contains localized strings which must be excluded from the hash computation. # Before Windows 8, these strings are located at 4 different offsets which can be calculated by adding # 0x100 to the values respectively stored in bytes 0x1f8, 0x1f9, 0x1fa and 0x1fb. # Starting from Windows 8, these strings are located at 3 different offsets which are directly stored in # little endian words respectively at 0x1f6, 0x1f8 and 0x1fa # Since there is no easy way to tell which version of Windows we are dealing with beforehand, we first # assume it is a Windows < 8 by testing 0x1f8 against all the known first offset. If all tests fail, assume # it is Windows >= 8 and check 0x1f6 against the only known first offset (to date) firstStrOffset = Int8ub.parse(self._raw[0x1f8]) # Windows NT5 if firstStrOffset == 0x83: expectedLoader = 'NT5.1/NT5.2 VBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.0 elif firstStrOffset == 0x80: expectedLoader = 'NT6.0 VBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.1 elif firstStrOffset == 0x8c: expectedLoader = 'NT6.1 VBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.2+ else: firstStrOffset = Int16ul.parse(self._raw[0x1f6:0x1f8]) if firstStrOffset == 0x18a: expectedLoader = 'NT6.2+ VBR' codeEnd = firstStrOffset if codeEnd is None: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format(firstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a NTFS VBR: {0:#x}. ' 'It should be 0x83, 0x80, 0x8c or 0x18a.'.format( firstStrOffset)) codeEnd = 0 elif vbrType == 'bitlocker': expectedLoader = 'NT6.1+ Bitlocker VBR' # The first three bytes are a jump over the NTFS BPB to where the code really starts (0x5A) and a NOP invariantCode += vbrStruct.JumpOverBPB # First section of code (_BITLOCKER_VBR_STRUCT.Code1) invariantCode += vbrStruct.Code1 # In the second section of code, there are localized strings which must be excluded from hash computation. # Their offsets are stored in the last 3 bytes before the VBR signature (0x55aa). # For Windows 8, 8.1 and 10, the first string offset seems to always be 0x100 (ie. FirstStrOffset = 0x00) if vbrStruct.FirstStrOffset != 0: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format( vbrStruct.FirstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a Bitlocker VBR. ' 'It should be 0x00) : {0:#x}'.format( vbrStruct.FirstStrOffset)) codeStart = 0xc8 # Offset of Code2 codeEnd = 0x100 + vbrStruct.FirstStrOffset else: raise NotImplementedError( 'VBR type "{0}" is not implemented yet'.format(vbrType)) self._logger.debug( 'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format( expectedLoader, codeStart, codeEnd)) invariantCode += self._raw[codeStart:codeEnd] return expectedLoader, invariantCode
def uint16(self): return Int16ul.parse(self.read(2))