class MasterBootRecord(BootRecord): _MBR_STRUCT = construct.Struct( "mbr", construct.HexDumpAdapter(construct.Bytes("bootloader_code", 440)), construct.Field('disk_signature', 4), construct.Padding(2), construct.Array( 4, construct.Struct( "partitions", construct.SLInt8("state"), construct.BitStruct( "beginning", construct.Octet("head"), construct.Bits("sect", 6), construct.Bits("cyl", 10), ), construct.Enum( construct.UBInt8("type"), Nothing=0x00, FAT12=0x01, XENIX_ROOT=0x02, XENIX_USR=0x03, FAT16_old=0x04, Extended_DOS=0x05, FAT16=0x06, FAT32=0x0b, FAT32_LBA=0x0c, NTFS=0x07, LINUX_SWAP=0x82, LINUX_NATIVE=0x83, PROTECTIVE_MBR=0xee, _default_=construct.Pass, ), construct.BitStruct( "ending", construct.Octet("head"), construct.Bits("sect", 6), construct.Bits("cyl", 10), ), construct.ULInt32( "sector_offset"), # offset from MBR in sectors construct.ULInt32("size"), # in sectors )), construct.Const(construct.Bytes("signature", 2), '55aa'.decode('hex')), ) def __init__(self, filePath, size, offset=None, whitelist=()): self._type = 'MBR' super(MasterBootRecord, self).__init__(filePath, size, offset, whitelist) def _parse(self): """ Main method in charge of parsing the MBR. It will try to parse the boot record according to documented known structure and extract the partition table disk signature and code section. It will then try to narrow down invariant code, hash it and match the hash against a whitelist. If no match was found, it will try some simple heuristics to detect malicious behaviours. Returns: nothing """ try: mbr = self._MBR_STRUCT.parse(self._raw) except construct.core.ConstructError as e: raise InvalidMBRError('Invalid MBR structure: {0}\n{1}'.format( e, hexdump(self._raw))) self._parsePartTable(mbr.partitions) # Windows stores the disk signature at 0x1B8, other MBRs seem to leave this area alone self._diskSignature = mbr.disk_signature # If code section is null, check for protective MBR signature (detected in partition table parsing). If found, # then the machine is likely using UEFI instead of BIOS to boot. If not, it could mean that the sample being # analyzed has been tampered by a bootkit if mbr.bootloader_code.encode('hex') == 440 * '00': if 'Protective MBR' in self._signature: self._signature.append('UEFI (no legacy boot code)') else: self._suspiciousBehaviour.append('Code section is null') else: expectedLoader, invariantCode = self._getInvariantCode( mbr.bootloader_code) codeHash = hashlib.sha256(invariantCode) self._matchHash(codeHash, expectedLoader) if len(self._signature) == 0: # No whitelisted signature matched, try some simple heuristics to flag this MBR as malicious # Note that the self._checkCode method is only given the "invariant" code section to help with the # disassembling. This will obviously leads to broken offsets, but it doesn't matter since the heuristics # don't use them. self._checkCode(invariantCode) def _parsePartTable(self, partitions): """ Private method that parses the partition table of the MBR. Updates self._partTable list. Args: partitions: Construct.Container object of the partition table Returns: nothing """ partNum = 0 for part in partitions: partNum += 1 # Assume a partition entry without size (in LBA) or type is invalid, and do not include it in the listing. if part.size != 0 and part.type != 'Nothing': self._partTable.append((partNum, part.state < 0, part.type, part.sector_offset, part.size)) else: self._logger.debug('Ignoring invalid partition: %s', part) # Early detection of protective MBR so that we don't try to make sense of the MBR partition table if part.type == 'PROTECTIVE_MBR' and partNum == 1: self._logger.debug( 'Protective MBR detected, MBR partition table should not be taken into account. ' 'GPT partition table parser not implemented yet') self._signature.append('Protective MBR') def _getInvariantCode(self, rawCode): """ Helper method that tries to narrow down "invariant code" which can be hashed and compared to well known signatures. Most MBRs have localized error strings which must be excluded from the hash computation because they may vary from a country to another. First, this method tries to detect what kind of MBR it is dealing with. Most of the time, it is enough to to look for some known hardcoded strings that identify "well known" MBR (such as Truecrypt, GRUB2, etc...). Then, this method finds where the strings are and "removes" them (as in "does not include them"). Finding these strings can be achieved by quickly studying the assembly code and looking for how these strings are echoed on screen at boot time (using interrupt 0x10). This research only needs to be done once for each type of MBR but requires an analyst to do it by static analysis. This script cannot take care of this. This method merely implements the results of such work. Currently supported MBR are: - Truecrypt - McAfee Endpoint Encryption (Safeboot) - GRUB2 - Windows (XP to 10) Args: rawCode: str of the code section Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code) """ # By default, assume all the MBR code section will be hashed. It is obviously wrong in most cases, but it allows # for a "default case" which will automatically matches no known hash in case something goes wrong with the # detection. codeStart = 0 codeEnd = len(rawCode) expectedLoader = None invariantCode = str() # TrueCrypt (detected with the hardcoded string following the first jump: " TrueCrypt Boot Loader") if rawCode[0x5:0x1b].encode('hex').upper( ) == '2054727565437279707420426F6F74204C6F61646572': # TrueCrypt uses hardcoded and not-localized error strings. Therefore every TrueCrypt MBR should have the # same code from start to end expectedLoader = 'TrueCrypt MBR' # MacAfee SafeBoot (detected with the hardcoded string following the first jump: "Safeboot ") elif rawCode[0x3:0xc].encode('hex').upper() == '53616665426F6F7420': # Two versions have been seen but both start with a jump to the same offset (0x26). # There are some strings at the of the code section but localization is unlikely so it will be assumed # to be hardcoded (until a localized version is found...). # Therefore, Safeboot code can be hashed from 0x26 to the end of code section invariantCode += rawCode[:0x3] # Hash the first JMP codeStart = 0x26 expectedLoader = 'Safeboot MBR' # GRUB (detected with the hardcoded string "GRUB " located at 0x188) elif rawCode[0x188:0x18d].encode('hex').upper() == '4752554220': # GRUB has some error strings but they are hardcoded and not localized so they can be included in the hash # computation. However GRUB can be installed on a disk (MBR) as well as on a partition (in a kind of VBR). # But in both cases the code used is the same. Since a BPB is needed for the latter case it is also present # in the MBR (but not needed). It therefore has to be excluded from the hash computation. # GRUB is jumping over the BIOS Parameter Block located between 0x3 and 0x5a. # It should be followed by the kernel address (word), kernel sector (dword), kernel sector high (dword) and # boot drive (byte). Therefore the code really starts at 0x65. # These values are hardcoded in boot.img and have little chance to change anytime soon. codeStart = 0x65 invariantCode += rawCode[:0x3] # Hash the first JMP expectedLoader = 'GRUB2 MBR' # Windows MBR cannot be detected with hardcoded strings, so they fall in the default case and further checks # are then made based on the hypothesis that this is indeed a Windows MBR. else: # Starting with NT5.0, the MBR contains localized strings which must be excluded from the hash computation. # These strings are located after the code, at 3 different offsets which can be calculated by adding 0x100 # to the values respectively stored in bytes 0x1b5, 0x1b6 and 0x1b7 (last bytes of the code section). # Eg: The first localized string is at : 0x100 + the value saved at offset 0x1B5 # Even though localized strings can be of different lengths, the offset of the first one does not vary # given one Windows version. This can therefore be used to tell Windows versions apart. firstStrOffset = construct.UBInt8('FirstStringOffset').parse( rawCode[0x1b5]) # Windows NT5 if firstStrOffset == 0x2c: expectedLoader = 'NT5.1/NT5.2 MBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.0 elif firstStrOffset == 0x62: expectedLoader = 'NT6.0 MBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.1+ elif firstStrOffset == 0x63: expectedLoader = 'NT6.1+ MBR' codeEnd = 0x100 + firstStrOffset else: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format(firstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a windows MBR.' 'It should be 0x2c, 0x62 or 0x63) : {0:#x}'.format( firstStrOffset)) self._logger.debug( 'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format( expectedLoader, codeStart, codeEnd)) invariantCode += rawCode[codeStart:codeEnd] return expectedLoader, invariantCode def _checkCode(self, rawCode): md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_16) md.detail = True checkJmp = True for i in md.disasm(rawCode, 0): # Check for JUMPs and CALLs before the first PUSH/RET. if checkJmp and len(i.groups) > 0: # Group check if available if hasattr(capstone.x86, 'X86_GRP_CALL') and hasattr( capstone.x86, 'X86_GRP_RET'): if capstone.x86.X86_GRP_CALL in i.groups or capstone.x86.X86_GRP_JUMP in i.groups: self._suspiciousBehaviour.append( 'JMP or CALL before relocation') checkJmp = False elif capstone.x86.X86_GRP_RET in i.groups: # Stop search after the first PUSH/RET checkJmp = False # Manual check in case capstone version doesn't support CALL and RET groups else: if i.mnemonic[0] == 'j' or i.mnemonic == 'call': self._suspiciousBehaviour.append( 'JMP or CALL before relocation') checkJmp = False elif i.mnemonic[:3] == 'ret': # Stop search after the first PUSH/RET checkJmp = False # Check for unknown interrupt if i.mnemonic == 'int' and i.bytes[1] not in (0x10, 0x13, 0x18, 0x1a): self._suspiciousBehaviour.append( 'Unknown Interrupt : {0:#x}'.format(i.bytes[1]))
class AslParser(interface.BaseParser): """Parser for ASL log files.""" NAME = 'asl_log' DESCRIPTION = u'Parser for ASL log files.' ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00' # If not right assigned, the value is "-1". ASL_NO_RIGHTS = 'ffffffff' # Priority level (criticity) ASL_MESSAGE_PRIORITY = { 0: 'EMERGENCY', 1: 'ALERT', 2: 'CRITICAL', 3: 'ERROR', 4: 'WARNING', 5: 'NOTICE', 6: 'INFO', 7: 'DEBUG' } # ASL File header. # magic: magic number that identify ASL files. # version: version of the file. # offset: first record in the file. # timestamp: epoch time when the first entry was written. # last_offset: last record in the file. ASL_HEADER_STRUCT = construct.Struct('asl_header_struct', construct.String('magic', 12), construct.UBInt32('version'), construct.UBInt64('offset'), construct.UBInt64('timestamp'), construct.UBInt32('cache_size'), construct.UBInt64('last_offset'), construct.Padding(36)) # The record structure is: # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry] # Record static structure. # tam_entry: it contains the number of bytes from this file position # until the end of the record, without counts itself. # next_offset: next record. If is equal to 0x00, it is the last record. # asl_message_id: integer that has the numeric identification of the event. # timestamp: Epoch integer that has the time when the entry was created. # nanosecond: nanosecond to add to the timestamp. # level: level of priority. # pid: process identification that ask to save the record. # uid: user identification that has lunched the process. # gid: group identification that has lunched the process. # read_uid: identification id of a user. Only applied if is not -1 (all FF). # Only root and this user can read the entry. # read_gid: the same than read_uid, but for the group. ASL_RECORD_STRUCT = construct.Struct('asl_record_struct', construct.Padding(2), construct.UBInt32('tam_entry'), construct.UBInt64('next_offset'), construct.UBInt64('asl_message_id'), construct.UBInt64('timestamp'), construct.UBInt32('nanosec'), construct.UBInt16('level'), construct.UBInt16('flags'), construct.UBInt32('pid'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('read_uid'), construct.UBInt32('read_gid'), construct.UBInt64('ref_pid')) ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof() # 8-byte fields, they can be: # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String]. # - Integer: integer that has the byte position in the file that points # to an ASL_RECORD_DYN_VALUE struct. If the value of the integer # is equal to 0, it means that it has not data (skip). # If the field is a String, we use this structure to decode each # integer byte in the corresponding character (ASCII Char). ASL_OCTET_STRING = construct.ExprAdapter(construct.Octet('string'), encoder=lambda obj, ctx: ord(obj), decoder=lambda obj, ctx: chr(obj)) # Field string structure. If the first bit is 1, it means that it # is a String (1000) = 8, then the next nibble has the number of # characters. The last 7 bytes are the number of bytes. ASL_STRING = construct.BitStruct( 'string', construct.Flag('type'), construct.Bits('filler', 3), construct.If(lambda ctx: ctx.type, construct.Nibble('string_length')), construct.If(lambda ctx: ctx.type, construct.Array(7, ASL_OCTET_STRING))) # 8-byte pointer to a byte position in the file. ASL_POINTER = construct.UBInt64('pointer') # Dynamic data structure pointed by a pointer that contains a String: # [2 bytes padding][4 bytes lenght of String][String]. ASL_RECORD_DYN_VALUE = construct.Struct( 'asl_record_dyn_value', construct.Padding(2), construct.PascalString('value', length_field=construct.UBInt32('length'))) def Parse(self, parser_context, file_entry): """Extract entries from an ASL file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() file_object.seek(0, os.SEEK_SET) try: header = self.ASL_HEADER_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: file_object.close() raise errors.UnableToParseFile( u'Unable to parse ASL Header with error: {0:s}.'.format( exception)) if header.magic != self.ASL_MAGIC: file_object.close() raise errors.UnableToParseFile( u'Not an ASL Header, unable to parse.') # Get the first and the last entry. offset = header.offset old_offset = header.offset last_offset_header = header.last_offset # If the ASL file has entries. if offset: event_object, offset = self.ReadAslEvent(file_object, offset) while event_object: parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) # TODO: an anomaly object must be emitted once that is implemented. # Sanity check, the last read element must be the same as # indicated by the header. if offset == 0 and old_offset != last_offset_header: logging.warning(u'Parsing ended before the header ends.') old_offset = offset event_object, offset = self.ReadAslEvent(file_object, offset) file_object.close() def ReadAslEvent(self, file_object, offset): """Returns an AslEvent from a single ASL entry. Args: file_object: a file-like object that points to an ASL file. offset: offset where the static part of the entry starts. Returns: An event object constructed from a single ASL record. """ # The heap of the entry is saved to try to avoid seek (performance issue). # It has the real start position of the entry. dynamic_start = file_object.tell() dynamic_part = file_object.read(offset - file_object.tell()) if not offset: return None, None try: record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to parse ASL event with error: {0:s}'.format( exception)) return None, None # Variable tam_fields = is the real length of the dynamic fields. # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before] # In Record_Struct we have a field called tam_entry, where it has the number # of bytes until the end of the entry from the position that the field is. # The tam_entry is between the 2th and the 6th byte in the [Record_Struct]. # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # Also, we do not need [Point_Entry_Before] and then we delete the size of # [Point_Entry_Before] that it is 8 bytes (8): # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8 # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2 tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2 # Dynamic part of the entry that contains minimal four fields of 8 bytes # plus 2x[8bytes] fields for each extra ASL_Field. # The four first fields are always the Host, Sender, Facility and Message. # After the four first fields, the entry might have extra ASL_Fields. # For each extra ASL_field, it has a pair of 8-byte fields where the first # 8 bytes contains the name of the extra ASL_field and the second 8 bytes # contains the text of the exta field. # All of this 8-byte field can be saved using one of these three differents # types: # - Null value ('0000000000000000'): nothing to do. # - String: It is string if first bit = 1 or first nibble = 8 (1000). # Second nibble has the length of string. # The next 7 bytes have the text characters of the string # padding the end with null characters: '0x00'. # Example: [8468 6964 6400 0000] # [8] String, [4] length, value: [68 69 64 64] = hidd. # - Pointer: static position in the file to a special struct # implemented as an ASL_RECORD_DYN_VALUE. # Example: [0000 0000 0000 0077] # It points to the file position 0x077 that has a # ASL_RECORD_DYN_VALUE structure. values = [] while tam_fields > 0: try: raw_field = file_object.read(8) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to parse ASL event with error: {0:d}'.format( exception)) return None, None try: # Try to read as a String. field = self.ASL_STRING.parse(raw_field) values.append(''.join(field.string[0:field.string_length])) # Go to parse the next extra field. tam_fields -= 8 continue except ValueError: pass # If it is not a string, it must be a pointer. try: field = self.ASL_POINTER.parse(raw_field) except ValueError as exception: logging.warning( u'Unable to parse ASL event with error: {0:s}'.format( exception)) return None, None if field != 0: # The next IF ELSE is only for performance issues, avoiding seek. # If the pointer points a lower position than where the actual entry # starts, it means that it points to a previuos entry. pos = field - dynamic_start # Bigger or equal 0 means that the data is in the actual entry. if pos >= 0: try: values.append((self.ASL_RECORD_DYN_VALUE.parse( dynamic_part[pos:])).value.partition('\x00')[0]) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to parse ASL event with error: {0:s}'. format(exception)) return None, None else: # Only if it is a pointer that points to the # heap from another entry we use the seek method. main_position = file_object.tell() # If the pointer is in a previous entry. if main_position > field: file_object.seek(field - main_position, os.SEEK_CUR) try: values.append( (self.ASL_RECORD_DYN_VALUE.parse_stream( file_object)).value.partition('\x00')[0]) except (IOError, construct.FieldError): logging.warning(( u'The pointer at {0:d} (0x{0:x}) points to invalid ' u'information.' ).format(main_position - self.ASL_POINTER.sizeof())) # Come back to the position in the entry. _ = file_object.read(main_position - file_object.tell()) else: _ = file_object.read(field - main_position) values.append((self.ASL_RECORD_DYN_VALUE.parse_stream( file_object)).value.partition('\x00')[0]) # Come back to the position in the entry. file_object.seek(main_position - file_object.tell(), os.SEEK_CUR) # Next extra field: 8 bytes more. tam_fields -= 8 # Read the last 8 bytes of the record that points to the previous entry. _ = file_object.read(8) # Parsed section, we translate the read data to an appropriate format. microsecond = record_header.nanosec // 1000 timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( record_header.timestamp, microsecond) record_position = offset message_id = record_header.asl_message_id level = u'{0} ({1})'.format( self.ASL_MESSAGE_PRIORITY[record_header.level], record_header.level) # If the value is -1 (FFFFFFFF), it can be read by everyone. if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16): read_uid = record_header.read_uid else: read_uid = 'ALL' if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16): read_gid = record_header.read_gid else: read_gid = 'ALL' # Parsing the dynamic values (text or pointers to position with text). # The first four are always the host, sender, facility, and message. computer_name = values[0] sender = values[1] facility = values[2] message = values[3] # If the entry has an extra fields, they works as a pairs: # The first is the name of the field and the second the value. extra_information = '' if len(values) > 4: values = values[4:] for index in xrange(0, len(values) // 2): extra_information += (u'[{0}: {1}]'.format( values[index * 2], values[(index * 2) + 1])) # Return the event and the offset for the next entry. return AslEvent(timestamp, record_position, message_id, level, record_header, read_uid, read_gid, computer_name, sender, facility, message, extra_information), record_header.next_offset
class ASLParser(interface.FileObjectParser): """Parser for ASL log files.""" _INITIAL_FILE_OFFSET = None NAME = u'asl_log' DESCRIPTION = u'Parser for ASL log files.' _ASL_MAGIC = b'ASL DB\x00\x00\x00\x00\x00\x00' # ASL File header. # magic: magic number that identify ASL files. # version: version of the file. # offset: first record in the file. # timestamp: time when the first entry was written. # Contains the number of seconds since January 1, 1970 00:00:00 UTC. # last_offset: last record in the file. _ASL_HEADER_STRUCT = construct.Struct(u'asl_header_struct', construct.String(u'magic', 12), construct.UBInt32(u'version'), construct.UBInt64(u'offset'), construct.UBInt64(u'timestamp'), construct.UBInt32(u'cache_size'), construct.UBInt64(u'last_offset'), construct.Padding(36)) # The record structure is: # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry] # Record static structure. # tam_entry: it contains the number of bytes from this file position # until the end of the record, without counts itself. # next_offset: next record. If is equal to 0x00, it is the last record. # asl_message_id: integer that has the numeric identification of the event. # timestamp: the entry creation date and time. # Contains the number of seconds since January 1, 1970 00:00:00 UTC. # nanosecond: nanosecond to add to the timestamp. # level: level of priority. # pid: process identification that ask to save the record. # uid: user identification that has lunched the process. # gid: group identification that has lunched the process. # read_uid: identification id of a user. Only applied if is not -1 (all FF). # Only root and this user can read the entry. # read_gid: the same than read_uid, but for the group. _ASL_RECORD_STRUCT = construct.Struct(u'asl_record_struct', construct.Padding(2), construct.UBInt32(u'tam_entry'), construct.UBInt64(u'next_offset'), construct.UBInt64(u'asl_message_id'), construct.UBInt64(u'timestamp'), construct.UBInt32(u'nanosec'), construct.UBInt16(u'level'), construct.UBInt16(u'flags'), construct.UBInt32(u'pid'), construct.UBInt32(u'uid'), construct.UBInt32(u'gid'), construct.UBInt32(u'read_uid'), construct.UBInt32(u'read_gid'), construct.UBInt64(u'ref_pid')) _ASL_RECORD_STRUCT_SIZE = _ASL_RECORD_STRUCT.sizeof() # 8-byte fields, they can be: # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String]. # - Integer: integer that has the byte position in the file that points # to an ASL_RECORD_DYN_VALUE struct. If the value of the integer # is equal to 0, it means that it has not data (skip). # If the field is a String, we use this structure to decode each # integer byte in the corresponding character (ASCII Char). _ASL_OCTET_STRING = construct.ExprAdapter( construct.Octet(u'string'), encoder=lambda obj, ctx: ord(obj), decoder=lambda obj, ctx: chr(obj)) # Field string structure. If the first bit is 1, it means that it # is a String (1000) = 8, then the next nibble has the number of # characters. The last 7 bytes are the number of bytes. _ASL_STRING = construct.BitStruct( u'string', construct.Flag(u'type'), construct.Bits(u'filler', 3), construct.If(lambda ctx: ctx.type, construct.Nibble(u'string_length')), construct.If(lambda ctx: ctx.type, construct.Array(7, _ASL_OCTET_STRING))) # 8-byte pointer to a byte position in the file. _ASL_POINTER = construct.UBInt64(u'pointer') # Dynamic data structure pointed by a pointer that contains a String: # [2 bytes padding][4 bytes size of String][String]. _ASL_RECORD_DYN_VALUE = construct.Struct( u'asl_record_dyn_value', construct.Padding(2), construct.UBInt32(u'size'), construct.Bytes(u'value', lambda ctx: ctx.size)) def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an ALS file-like object. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) try: header = self._ASL_HEADER_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse ASL Header with error: {0:s}.'.format( exception)) if header.magic != self._ASL_MAGIC: raise errors.UnableToParseFile( u'Not an ASL Header, unable to parse.') offset = header.offset if not offset: return header_last_offset = header.last_offset previous_offset = offset event_object, offset = self.ReadASLEvent(parser_mediator, file_object, offset) while event_object: # Sanity check, the last read element must be the same as # indicated by the header. if offset == 0 and previous_offset != header_last_offset: parser_mediator.ProduceParseError( u'Unable to parse header. Last element header does not match ' u'header offset.') previous_offset = offset event_object, offset = self.ReadASLEvent(parser_mediator, file_object, offset) def ReadASLEvent(self, parser_mediator, file_object, offset): """Reads an ASL record at a specific offset. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object that points to an ASL file. offset: an integer containing the offset of the ASL record. Returns: A tuple of an event object extracted from the ASL record, and the offset to the next ASL record in the file. """ # The heap of the entry is saved to try to avoid seek (performance issue). # It has the real start position of the entry. dynamic_data_offset = file_object.tell() try: dynamic_data = file_object.read(offset - dynamic_data_offset) except IOError as exception: parser_mediator.ProduceParseError( u'unable to read ASL record dynamic data with error: {0:s}'. format(exception)) return None, None if not offset: return None, None try: record_struct = self._ASL_RECORD_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceParseError( u'unable to parse ASL record with error: {0:s}'.format( exception)) return None, None # Variable tam_fields = is the real length of the dynamic fields. # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before] # In Record_Struct we have a field called tam_entry, where it has the number # of bytes until the end of the entry from the position that the field is. # The tam_entry is between the 2th and the 6th byte in the [Record_Struct]. # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # Also, we do not need [Point_Entry_Before] and then we delete the size of # [Point_Entry_Before] that it is 8 bytes (8): # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8 # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2 tam_fields = record_struct.tam_entry - self._ASL_RECORD_STRUCT_SIZE - 2 # Dynamic part of the entry that contains minimal four fields of 8 bytes # plus 2 x [8 bytes] fields for each extra ASL_Field. # The four first fields are always the Host, Sender, Facility and Message. # After the four first fields, the entry might have extra ASL_Fields. # For each extra ASL_field, it has a pair of 8-byte fields where the first # 8 bytes contains the name of the extra ASL_field and the second 8 bytes # contains the text of the extra field. # All of this 8-byte field can be saved using one of these three different # types: # - Null value ('0000000000000000'): nothing to do. # - String: It is string if first bit = 1 or first nibble = 8 (1000). # Second nibble has the length of string. # The next 7 bytes have the text characters of the string # padding the end with null characters: '0x00'. # Example: [8468 6964 6400 0000] # [8] String, [4] length, value: [68 69 64 64] = hidd. # - Pointer: static position in the file to a special struct # implemented as an ASL_RECORD_DYN_VALUE. # Example: [0000 0000 0000 0077] # It points to the file position 0x077 that has a # ASL_RECORD_DYN_VALUE structure. values = [] while tam_fields > 0: try: field_data = file_object.read(8) except IOError as exception: parser_mediator.ProduceParseError( u'unable to read ASL field with error: {0:s}'.format( exception)) return None, None # Try to read the field data as a string. try: asl_string_struct = self._ASL_STRING.parse(field_data) string_data = b''.join( asl_string_struct.string[0:asl_string_struct. string_length]) values.append(string_data) # Go to parse the next extra field. tam_fields -= 8 continue except ValueError: pass # If the field is not a string it must be a pointer. try: pointer_value = self._ASL_POINTER.parse(field_data) except ValueError as exception: parser_mediator.ProduceParseError( u'unable to parse ASL field with error: {0:s}'.format( exception)) return None, None if not pointer_value: # Next extra field: 8 bytes more. tam_fields -= 8 continue # The next IF ELSE is only for performance issues, avoiding seek. # If the pointer points a lower position than where the actual entry # starts, it means that it points to a previous entry. pos = pointer_value - dynamic_data_offset # Greater or equal 0 means that the data is in the actual entry. if pos >= 0: try: dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse( dynamic_data[pos:]) dyn_value = dyn_value_struct.value.partition(b'\x00')[0] values.append(dyn_value) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceParseError(( u'unable to parse ASL record dynamic value with error: ' u'{0:s}').format(exception)) return None, None else: # Only if it is a pointer that points to the # heap from another entry we use the seek method. main_position = file_object.tell() # If the pointer is in a previous entry. if main_position > pointer_value: file_object.seek(pointer_value - main_position, os.SEEK_CUR) try: dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream( file_object) dyn_value = dyn_value_struct.value.partition( b'\x00')[0] values.append(dyn_value) except (IOError, construct.FieldError): parser_mediator.ProduceParseError(( u'the pointer at {0:d} (0x{0:08x}) points to invalid ' u'information.' ).format(main_position - self._ASL_POINTER.sizeof())) # Come back to the position in the entry. _ = file_object.read(main_position - file_object.tell()) else: _ = file_object.read(pointer_value - main_position) dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream( file_object) dyn_value = dyn_value_struct.value.partition(b'\x00')[0] values.append(dyn_value) # Come back to the position in the entry. file_object.seek(main_position - file_object.tell(), os.SEEK_CUR) # Next extra field: 8 bytes more. tam_fields -= 8 # Read the last 8 bytes of the record that points to the previous entry. _ = file_object.read(8) # Parsed section, we translate the read data to an appropriate format. micro_seconds, _ = divmod(record_struct.nanosec, 1000) # Parsing the dynamic values (text or pointers to position with text). # The first four are always the host, sender, facility, and message. number_of_values = len(values) if number_of_values < 4: parser_mediator.ProduceParseError( u'less than four values read from an ASL event.') computer_name = u'N/A' sender = u'N/A' facility = u'N/A' message = u'N/A' if number_of_values >= 1: computer_name = values[0].decode(u'utf-8') if number_of_values >= 2: sender = values[1].decode(u'utf-8') if number_of_values >= 3: facility = values[2].decode(u'utf-8') if number_of_values >= 4: message = values[3].decode(u'utf-8') # If the entry has an extra fields, they works as a pairs: # The first is the name of the field and the second the value. extra_information = u'' if number_of_values > 4 and number_of_values % 2 == 0: # Taking all the extra attributes and merging them together, # eg: a = [1, 2, 3, 4] will look like "1: 2, 3: 4". try: extra_values = map(py2to3.UNICODE_TYPE, values[4:]) extra_information = u', '.join( map(u': '.join, zip(extra_values[0::2], extra_values[1::2]))) except UnicodeDecodeError as exception: parser_mediator.ProduceParseError( u'Unable to decode all ASL values in the extra information fields.' ) event_object = ASLEvent(record_struct.timestamp, offset, record_struct.asl_message_id, record_struct.level, record_struct.pid, record_struct.uid, record_struct.gid, record_struct.read_uid, record_struct.read_gid, computer_name, sender, facility, message, extra_information, micro_seconds=micro_seconds) parser_mediator.ProduceEvent(event_object) return (event_object, record_struct.next_offset)