class TimeMachinePlugin(interface.PlistPlugin): """Basic plugin to extract time machine hardisk and the backups. Further details about the extracted fields: DestinationID: remote UUID hard disk where the backup is done. BackupAlias: structure that contains the extra information from the destinationID. SnapshotDates: list of the backup dates. """ NAME = 'time_machine' DESCRIPTION = 'Parser for TimeMachine plist files.' PLIST_PATH = 'com.apple.TimeMachine.plist' PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID']) TM_BACKUP_ALIAS = construct.Struct( 'tm_backup_alias', construct.Padding(10), construct.PascalString('value', length_field=construct.UBInt8('length'))) # pylint: disable=arguments-differ def GetEntries(self, parser_mediator, match=None, **unused_kwargs): """Extracts relevant TimeMachine entries. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. match (Optional[dict[str: object]]): keys extracted from PLIST_KEYS. """ destinations = match.get('Destinations', []) for destination in destinations: destination_identifier = (destination.get('DestinationID', None) or 'Unknown device') alias = destination.get('BackupAlias', '<ALIAS>') try: alias = self.TM_BACKUP_ALIAS.parse(alias).value except construct.FieldError: alias = 'Unknown alias' event_data = plist_event.PlistTimeEventData() event_data.desc = 'TimeMachine Backup in {0:s} ({1:s})'.format( alias, destination_identifier) event_data.key = 'item/SnapshotDates' event_data.root = '/Destinations' snapshot_dates = destination.get('SnapshotDates', []) for datetime_value in snapshot_dates: timestamp = timelib.Timestamp.FromPythonDatetime( datetime_value) date_time = dfdatetime_posix_time.PosixTimeInMicroseconds( timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
class TimeMachinePlugin(interface.PlistPlugin): """Basic plugin to extract time machine hardisk and the backups. Further details about the extracted fields: DestinationID: remote UUID hard disk where the backup is done. BackupAlias: structure that contains the extra information from the destinationID. SnapshotDates: list of the backup dates. """ NAME = u'time_machine' DESCRIPTION = u'Parser for TimeMachine plist files.' PLIST_PATH = u'com.apple.TimeMachine.plist' PLIST_KEYS = frozenset([u'Destinations', u'RootVolumeUUID']) TM_BACKUP_ALIAS = construct.Struct( u'tm_backup_alias', construct.Padding(10), construct.PascalString(u'value', length_field=construct.UBInt8(u'length'))) def GetEntries(self, parser_mediator, match=None, **unused_kwargs): """Extracts relevant TimeMachine entries. Args: parser_mediator: A parser mediator object (instance of ParserMediator). match: Optional dictionary containing keys extracted from PLIST_KEYS. The default is None. """ if u'Destinations' not in match: return root = u'/Destinations' key = u'item/SnapshotDates' # For each TimeMachine devices. for destination in match[u'Destinations']: hd_uuid = destination.get(u'DestinationID', None) if not hd_uuid: hd_uuid = u'Unknown device' alias = destination.get(u'BackupAlias', u'<ALIAS>') try: alias = self.TM_BACKUP_ALIAS.parse(alias).value except construct.FieldError: alias = u'Unknown alias' # For each Backup. for timestamp in destination.get(u'SnapshotDates', []): description = u'TimeMachine Backup in {0:s} ({1:s})'.format( alias, hd_uuid) event_object = plist_event.PlistEvent(root, key, timestamp, description) parser_mediator.ProduceEvent(event_object)
class TimeMachinePlugin(interface.PlistPlugin): """Basic plugin to extract time machine hardisk and the backups.""" NAME = 'plist_timemachine' DESCRIPTION = u'Parser for TimeMachine plist files.' PLIST_PATH = 'com.apple.TimeMachine.plist' PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID']) # Generated events: # DestinationID: remote UUID hard disk where the backup is done. # BackupAlias: structure that contains the extra information from the # destinationID. # SnapshotDates: list of the backup dates. TM_BACKUP_ALIAS = construct.Struct( 'tm_backup_alias', construct.Padding(10), construct.PascalString('value', length_field=construct.UBInt8('length'))) def GetEntries(self, parser_context, match=None, **unused_kwargs): """Extracts relevant TimeMachine entries. Args: parser_context: A parser context object (instance of ParserContext). match: Optional dictionary containing keys extracted from PLIST_KEYS. The default is None. """ root = '/Destinations' key = 'item/SnapshotDates' # For each TimeMachine devices. for destination in match['Destinations']: hd_uuid = destination['DestinationID'] if not hd_uuid: hd_uuid = u'Unknown device' alias = destination['BackupAlias'] try: alias = self.TM_BACKUP_ALIAS.parse(alias).value except construct.FieldError: alias = u'Unknown alias' # For each Backup. for timestamp in destination['SnapshotDates']: description = u'TimeMachine Backup in {0:s} ({1:s})'.format( alias, hd_uuid) event_object = plist_event.PlistEvent(root, key, timestamp, description) parser_context.ProduceEvent(event_object, plugin_name=self.NAME)
class TimeMachinePlugin(interface.PlistPlugin): """Basic plugin to extract time machine hardisk and the backups.""" NAME = 'plist_timemachine' PLIST_PATH = 'com.apple.TimeMachine.plist' PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID']) # Yield Events # # DestinationID: remote UUID hard disk where the backup is done. # BackupAlias: structure that contains the extra information from the # destinationID. # SnapshotDates: list of the backup dates. TM_BACKUP_ALIAS = construct.Struct( 'tm_backup_alias', construct.Padding(10), construct.PascalString('value', length_field=construct.UBInt8('length'))) def GetEntries(self, match, **unused_kwargs): """Extracts relevant TimeMachine entries. Args: match: A dictionary containing keys extracted from PLIST_KEYS. Yields: EventObject objects extracted from the plist. """ root = '/Destinations' key = 'item/SnapshotDates' # For each TimeMachine devices. for destination in match['Destinations']: hd_uuid = destination['DestinationID'] if not hd_uuid: hd_uuid = u'Unknown device' alias = destination['BackupAlias'] try: alias = self.TM_BACKUP_ALIAS.parse(alias).value except construct.FieldError: alias = u'Unknown alias' # For each Backup. for timestamp in destination['SnapshotDates']: description = u'TimeMachine Backup in {} ({})'.format( alias, hd_uuid) yield plist_event.PlistEvent(root, key, timestamp, description)
def __init__(s): s.header_cmd0 = construct.Struct('CMD0Header', construct.UBInt8('magic'), construct.UBInt8('unk_0'), construct.UBInt8('unk_1'), construct.UBInt8('unk_2'), construct.UBInt8('unk_3'), construct.UBInt8('flags'), construct.UBInt8('id_primary'), construct.UBInt8('id_secondary'), construct.UBInt16('error_code'), construct.UBInt16('payload_size_cmd0') ) s.header_cmd1 = construct.Struct('CMD1Header', construct.Padding(48) ) s.header_cmd2 = construct.Struct('CMD2Header', construct.ULInt16('JDN_base'), construct.Padding(2), construct.ULInt32('seconds') ) s.header = construct.Struct('CMDHeader', construct.ULInt16('packet_type'), construct.ULInt16('cmd_id'), construct.ULInt16('payload_size'), construct.ULInt16('seq_id'), construct.Switch('cmd_hdr', lambda ctx: ctx.cmd_id, { 0 : construct.If(lambda ctx: ctx.payload_size >= s.header_cmd0.sizeof(), construct.Embed(s.header_cmd0)), 1 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd1.sizeof(), construct.Embed(s.header_cmd1)), 2 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd2.sizeof(), construct.Embed(s.header_cmd2)) }, default = construct.Pass ) ) s.cmd_handlers = { 0 : s.cmd0, 1 : s.cmd1, 2 : s.cmd2 } s.cmd0_handlers = { 5 : { 6 : s.cmd0_5_6 }, }
def PrefixedBytes(name, length_field=construct.UBInt8("length")): # noqa """ Length-prefixed binary data. This is like a :py:func:`construct.macros.PascalString` that raises a :py:class:`constrcut.AdaptationError` when encoding something other than :py:class:`bytes`. :param name: The attribute name under which this value will be accessible. :type name: :py:class:`str` :param length_field: (optional) The prefixed length field. Defaults to :py:func:`construct.macros.UBInt8`. :type length_field: a :py:class:`construct.core.FormatField` """ return construct.LengthValueAdapter( construct.Sequence( name, length_field, BytesAdapter( construct.Field("data", operator.attrgetter(length_field.name)))))
import os import repoze.lru import signal import struct import sys import traceback import traffic_control # Control message for our protocol; first few bits are special as we have to # maintain compatibility with LTPv3 in the kernel (first bit must be 1); also # the packet must be at least 12 bytes in length, otherwise some firewalls # may filter it when used over port 53 ControlMessage = cs.Struct( "control", # Ensure that the first bit is 1 (L2TP control packet) cs.Const(cs.UBInt8("magic1"), 0x80), # Reduce conflict matching to other protocols as we run on port 53 cs.Const(cs.UBInt16("magic2"), 0x73A7), # Protocol version to allow future upgrades cs.UBInt8("version"), # Message type cs.UBInt8("type"), # Message data (with length prefix) cs.PascalString("data"), # Pad the message so it is at least 12 bytes long cs.Padding(lambda ctx: max(0, 6 - len(ctx["data"]))), ) # Unreliable messages (0x00 - 0x7F) CONTROL_TYPE_COOKIE = 0x01 CONTROL_TYPE_PREPARE = 0x02
class CupsIppParser(interface.FileObjectParser): """Parser for CUPS IPP files. """ NAME = 'cups_ipp' DESCRIPTION = 'Parser for CUPS IPP files.' # INFO: # For each file, we have only one document with three different timestamps: # Created, process and finished. # Format: # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] # GROUP ID: [1byte ID] # PAIR: [TagID][\x00][Name][Value]) # TagID: 1 byte integer with the type of "Value". # Name: [Length][Text][\00] # Name can be empty when the name has more than one value. # Example: family name "lopez mata" with more than one surname. # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] # Value: can be integer, boolean, or text provided by TagID. # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] # If integer, Value: [\x04][Integer] # If text, Value: [Length text][Text][\00] # Magic number that identify the CUPS IPP supported version. IPP_MAJOR_VERSION = 2 IPP_MINOR_VERSION = 0 # Supported Operation ID. IPP_OP_ID = 5 # CUPS IPP File header. CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct', construct.UBInt8('major_version'), construct.UBInt8('minor_version'), construct.UBInt16('operation_id'), construct.UBInt32('request_id')) # Group ID that indicates the end of the IPP Control file. GROUP_END = 3 # Identification Groups. GROUP_LIST = [1, 2, 4, 5, 6, 7] # Type ID, per cups source file ipp-support.c. TYPE_GENERAL_INTEGER = 0x20 TYPE_INTEGER = 0x21 TYPE_BOOL = 0x22 TYPE_ENUMERATION = 0x23 TYPE_DATETIME = 0x31 # Type of values that can be extracted. INTEGER_8 = construct.UBInt8('integer') INTEGER_32 = construct.UBInt32('integer') TEXT = construct.PascalString('text', length_field=construct.UBInt8('length')) BOOLEAN = construct.Struct('boolean_value', construct.Padding(1), INTEGER_8) INTEGER = construct.Struct('integer_value', construct.Padding(1), INTEGER_32) # This is an RFC2579 datetime. DATETIME = construct.Struct( 'datetime', construct.Padding(1), construct.UBInt16('year'), construct.UBInt8('month'), construct.UBInt8('day'), construct.UBInt8('hour'), construct.UBInt8('minutes'), construct.UBInt8('seconds'), construct.UBInt8('deciseconds'), construct.String('direction_from_utc', length=1, encoding='ascii'), construct.UBInt8('hours_from_utc'), construct.UBInt8('minutes_from_utc'), ) # Name of the pair. PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1)) # Specific CUPS IPP to generic name. _NAME_PAIR_TRANSLATION = { 'com.apple.print.JobInfo.PMApplicationName': 'application', 'com.apple.print.JobInfo.PMJobOwner': 'owner', 'DestinationPrinterID': 'printer_id', 'document-format': 'doc_type', 'job-name': 'job_name', 'job-originating-host-name': 'computer_name', 'job-originating-user-name': 'user', 'job-uuid': 'job_id', 'printer-uri': 'uri' } _DATE_TIME_VALUES = { 'date-time-at-creation': definitions.TIME_DESCRIPTION_CREATION, 'date-time-at-processing': definitions.TIME_DESCRIPTION_START, 'date-time-at-completed': definitions.TIME_DESCRIPTION_END } _POSIX_TIME_VALUES = { 'time-at-creation': definitions.TIME_DESCRIPTION_CREATION, 'time-at-processing': definitions.TIME_DESCRIPTION_START, 'time-at-completed': definitions.TIME_DESCRIPTION_END } _DATE_TIME_VALUE_NAMES = list(_DATE_TIME_VALUES.keys()) _DATE_TIME_VALUE_NAMES.extend(list(_POSIX_TIME_VALUES.keys())) def _GetStringValue(self, data_dict, name, default_value=None): """Retrieves a specific string value from the data dict. Args: data_dict (dict[str, list[str]): values per name. name (str): name of the value to retrieve. Returns: str: value represented as a string. """ values = data_dict.get(name, None) if not values: return default_value for index, value in enumerate(values): if ',' in value: values[index] = '"{0:s}"'.format(value) return ', '.join(values) def _ReadPair(self, parser_mediator, file_object): """Reads an attribute name and value pair from a CUPS IPP event. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Returns: tuple: contains: str: name or None. str: value or None. """ # Pair = Type ID + Name + Value. try: # Can be: # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. # IDtag = Tag ID (1byte) + '0x00'. type_id = self.INTEGER_8.parse_stream(file_object) if type_id == self.GROUP_END: return None, None elif type_id in self.GROUP_LIST: # If it is a group ID we must read the next byte that contains # the first TagID. type_id = self.INTEGER_8.parse_stream(file_object) # 0x00 separator character. self.INTEGER_8.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError( 'unable to parse pair identifier with error: {0!s}'.format( exception)) return None, None # Name = Length name + name + 0x00 try: pair_name = self.PAIR_NAME.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError( 'unable to parse pair name with error: {0!s}'.format( exception)) return None, None try: name = pair_name.text.decode('utf-8') except UnicodeDecodeError as exception: parser_mediator.ProduceExtractionError( 'unable to decode pair name with error: {0!s}'.format( exception)) return None, None # Value: can be integer, boolean or text select by Type ID. if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION): value_structure = self.INTEGER elif type_id == self.TYPE_BOOL: value_structure = self.BOOLEAN elif type_id == self.TYPE_DATETIME: value_structure = self.DATETIME else: value_structure = self.TEXT try: value = value_structure.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError( 'unable to parse value with error: {0!s}'.format(exception)) return None, None if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION): value = value.integer elif type_id == self.TYPE_BOOL: value = bool(value.integer) elif type_id == self.TYPE_DATETIME: rfc2579_date_time_tuple = (value.year, value.month, value.day, value.hour, value.minutes, value.seconds, value.deciseconds, value.direction_from_utc, value.hours_from_utc, value.minutes_from_utc) value = dfdatetime_rfc2579_date_time.RFC2579DateTime( rfc2579_date_time_tuple=rfc2579_date_time_tuple) else: try: value = value.decode('utf-8') except UnicodeDecodeError as exception: parser_mediator.ProduceExtractionError( 'unable to decode value with error: {0!s}'.format( exception)) return None, None return name, value def _ReadPairs(self, parser_mediator, file_object): """Reads the attribute name and value pairs from a CUPS IPP event. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Returns: dict[str, list[str]]: values per name. """ data_dict = {} name, value = self._ReadPair(parser_mediator, file_object) while name or value: # Translate the known "name" CUPS IPP to a generic name value. pretty_name = self._NAME_PAIR_TRANSLATION.get(name, name) data_dict.setdefault(pretty_name, []).append(value) name, value = self._ReadPair(parser_mediator, file_object) return data_dict def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a CUPS IPP file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: header = self.CUPS_IPP_HEADER.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( 'Unable to parse CUPS IPP Header with error: {0!s}'.format( exception)) if (header.major_version != self.IPP_MAJOR_VERSION or header.minor_version != self.IPP_MINOR_VERSION): raise errors.UnableToParseFile( '[{0:s}] Unsupported version number.'.format(self.NAME)) if header.operation_id != self.IPP_OP_ID: # Warn if the operation ID differs from the standard one. We should be # able to parse the file nonetheless. logger.debug( '[{0:s}] Unsupported operation identifier in file: {1:s}.'. format(self.NAME, parser_mediator.GetDisplayName())) data_dict = self._ReadPairs(parser_mediator, file_object) time_dict = {} for name in self._DATE_TIME_VALUE_NAMES: value = data_dict.get(name, None) if value is not None: time_dict[name] = value del data_dict[name] event_data = CupsIppEventData() event_data.application = self._GetStringValue(data_dict, 'application') event_data.computer_name = self._GetStringValue( data_dict, 'computer_name') event_data.copies = data_dict.get('copies', [0])[0] event_data.data_dict = data_dict event_data.doc_type = self._GetStringValue(data_dict, 'doc_type') event_data.job_id = self._GetStringValue(data_dict, 'job_id') event_data.job_name = self._GetStringValue(data_dict, 'job_name') event_data.user = self._GetStringValue(data_dict, 'user') event_data.owner = self._GetStringValue(data_dict, 'owner') event_data.printer_id = self._GetStringValue(data_dict, 'printer_id') event_data.uri = self._GetStringValue(data_dict, 'uri') for name, usage in iter(self._DATE_TIME_VALUES.items()): time_values = time_dict.get(name, []) for date_time in time_values: event = time_events.DateTimeValuesEvent(date_time, usage) parser_mediator.ProduceEventWithEventData(event, event_data) for name, usage in iter(self._POSIX_TIME_VALUES.items()): time_values = time_dict.get(name, []) for time_value in time_values: date_time = dfdatetime_posix_time.PosixTime( timestamp=time_value) event = time_events.DateTimeValuesEvent(date_time, usage) parser_mediator.ProduceEventWithEventData(event, event_data)
class DataBlockFile(object): """Class that contains a data block file.""" SIGNATURE = 0xc104cac3 _FILE_HEADER = construct.Struct( u'chrome_cache_data_file_header', construct.ULInt32(u'signature'), construct.ULInt16(u'minor_version'), construct.ULInt16(u'major_version'), construct.ULInt16(u'file_number'), construct.ULInt16(u'next_file_number'), construct.ULInt32(u'block_size'), construct.ULInt32(u'number_of_entries'), construct.ULInt32(u'maximum_number_of_entries'), construct.Array(4, construct.ULInt32(u'emtpy')), construct.Array(4, construct.ULInt32(u'hints')), construct.ULInt32(u'updating'), construct.Array(5, construct.ULInt32(u'user'))) _CACHE_ENTRY = construct.Struct( u'chrome_cache_entry', construct.ULInt32(u'hash'), construct.ULInt32(u'next_address'), construct.ULInt32(u'rankings_node_address'), construct.ULInt32(u'reuse_count'), construct.ULInt32(u'refetch_count'), construct.ULInt32(u'state'), construct.ULInt64(u'creation_time'), construct.ULInt32(u'key_size'), construct.ULInt32(u'long_key_address'), construct.Array(4, construct.ULInt32(u'data_stream_sizes')), construct.Array(4, construct.ULInt32(u'data_stream_addresses')), construct.ULInt32(u'flags'), construct.Padding(16), construct.ULInt32(u'self_hash'), construct.Array(160, construct.UBInt8(u'key'))) def __init__(self): """Initializes the data block file object.""" super(DataBlockFile, self).__init__() self._file_object = None self.creation_time = None self.block_size = None self.number_of_entries = None self.version = None def _ReadFileHeader(self): """Reads the file header. Raises: IOError: if the file header cannot be read. """ self._file_object.seek(0, os.SEEK_SET) try: file_header = self._FILE_HEADER.parse_stream(self._file_object) except construct.FieldError as exception: raise IOError( u'Unable to parse file header with error: {0:s}'.format( exception)) signature = file_header.get(u'signature') if signature != self.SIGNATURE: raise IOError(u'Unsupported data block file signature') self.version = u'{0:d}.{1:d}'.format(file_header.get(u'major_version'), file_header.get(u'minor_version')) if self.version not in [u'2.0', u'2.1']: raise IOError(u'Unsupported data block file version: {0:s}'.format( self.version)) self.block_size = file_header.get(u'block_size') self.number_of_entries = file_header.get(u'number_of_entries') def ReadCacheEntry(self, block_offset): """Reads a cache entry. Args: block_offset: The block offset of the cache entry. Returns: A cache entry (instance of CacheEntry). """ self._file_object.seek(block_offset, os.SEEK_SET) try: cache_entry_struct = self._CACHE_ENTRY.parse_stream( self._file_object) except construct.FieldError as exception: raise IOError( u'Unable to parse cache entry with error: {0:s}'.format( exception)) cache_entry = CacheEntry() cache_entry.hash = cache_entry_struct.get(u'hash') cache_entry.next = CacheAddress( cache_entry_struct.get(u'next_address')) cache_entry.rankings_node = CacheAddress( cache_entry_struct.get(u'rankings_node_address')) cache_entry.creation_time = cache_entry_struct.get(u'creation_time') byte_array = cache_entry_struct.get(u'key') byte_string = b''.join(map(chr, byte_array)) cache_entry.key, _, _ = byte_string.partition(b'\x00') return cache_entry def Close(self): """Closes the data block file.""" if self._file_object: self._file_object.close() self._file_object = None def Open(self, file_object): """Opens the data block file. Args: file_object: the file object. """ self._file_object = file_object self._ReadFileHeader()
class BsmParser(interface.FileObjectParser): """Parser for BSM files.""" _INITIAL_FILE_OFFSET = None NAME = u'bsm_log' DESCRIPTION = u'Parser for BSM log files.' # BSM supported version (0x0b = 11). AUDIT_HEADER_VERSION = 11 # Magic Trail Header. BSM_TOKEN_TRAILER_MAGIC = b'b105' # IP Version constants. AU_IPv4 = 4 AU_IPv6 = 16 IPV4_STRUCT = construct.UBInt32(u'ipv4') IPV6_STRUCT = construct.Struct(u'ipv6', construct.UBInt64(u'high'), construct.UBInt64(u'low')) # Tested structures. # INFO: I have ommited the ID in the structures declaration. # I used the BSM_TYPE first to read the ID, and then, the structure. # Tokens always start with an ID value that identifies their token # type and subsequent structure. BSM_TYPE = construct.UBInt8(u'token_id') # Data type structures. BSM_TOKEN_DATA_CHAR = construct.String(u'value', 1) BSM_TOKEN_DATA_SHORT = construct.UBInt16(u'value') BSM_TOKEN_DATA_INTEGER = construct.UBInt32(u'value') # Common structure used by other structures. # audit_uid: integer, uid that generates the entry. # effective_uid: integer, the permission user used. # effective_gid: integer, the permission group used. # real_uid: integer, user id of the user that execute the process. # real_gid: integer, group id of the group that execute the process. # pid: integer, identification number of the process. # session_id: unknown, need research. BSM_TOKEN_SUBJECT_SHORT = construct.Struct( u'subject_data', construct.UBInt32(u'audit_uid'), construct.UBInt32(u'effective_uid'), construct.UBInt32(u'effective_gid'), construct.UBInt32(u'real_uid'), construct.UBInt32(u'real_gid'), construct.UBInt32(u'pid'), construct.UBInt32(u'session_id')) # Common structure used by other structures. # Identify the kind of inet (IPv4 or IPv6) # TODO: instead of 16, AU_IPv6 must be used. BSM_IP_TYPE_SHORT = construct.Struct( u'bsm_ip_type_short', construct.UBInt32(u'net_type'), construct.Switch(u'ip_addr', _BsmTokenGetNetType, {16: IPV6_STRUCT}, default=IPV4_STRUCT)) # Initial fields structure used by header structures. # length: integer, the length of the entry, equal to trailer (doc: length). # version: integer, version of BSM (AUDIT_HEADER_VERSION). # event_type: integer, the type of event (/etc/security/audit_event). # modifier: integer, unknown, need research (It is always 0). BSM_HEADER = construct.Struct(u'bsm_header', construct.UBInt32(u'length'), construct.UBInt8(u'version'), construct.UBInt16(u'event_type'), construct.UBInt16(u'modifier')) # First token of one entry. # timestamp: unsigned integer, number of seconds since # January 1, 1970 00:00:00 UTC. # microsecond: unsigned integer, number of micro seconds. BSM_HEADER32 = construct.Struct(u'bsm_header32', BSM_HEADER, construct.UBInt32(u'timestamp'), construct.UBInt32(u'microsecond')) BSM_HEADER64 = construct.Struct(u'bsm_header64', BSM_HEADER, construct.UBInt64(u'timestamp'), construct.UBInt64(u'microsecond')) BSM_HEADER32_EX = construct.Struct(u'bsm_header32_ex', BSM_HEADER, BSM_IP_TYPE_SHORT, construct.UBInt32(u'timestamp'), construct.UBInt32(u'microsecond')) # Token TEXT, provides extra information. BSM_TOKEN_TEXT = construct.Struct( u'bsm_token_text', construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # Path of the executable. BSM_TOKEN_PATH = BSM_TOKEN_TEXT # Identified the end of the record (follow by TRAILER). # status: integer that identifies the status of the exit (BSM_ERRORS). # return: returned value from the operation. BSM_TOKEN_RETURN32 = construct.Struct(u'bsm_token_return32', construct.UBInt8(u'status'), construct.UBInt32(u'return_value')) BSM_TOKEN_RETURN64 = construct.Struct(u'bsm_token_return64', construct.UBInt8(u'status'), construct.UBInt64(u'return_value')) # Identified the number of bytes that was written. # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC). # length: integer that has the number of bytes from the entry size. BSM_TOKEN_TRAILER = construct.Struct(u'bsm_token_trailer', construct.UBInt16(u'magic'), construct.UBInt32(u'record_length')) # A 32-bits argument. # num_arg: the number of the argument. # name_arg: the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT32 = construct.Struct( u'bsm_token_argument32', construct.UBInt8(u'num_arg'), construct.UBInt32(u'name_arg'), construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # A 64-bits argument. # num_arg: integer, the number of the argument. # name_arg: text, the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT64 = construct.Struct( u'bsm_token_argument64', construct.UBInt8(u'num_arg'), construct.UBInt64(u'name_arg'), construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # Identify an user. # terminal_id: unknown, research needed. # terminal_addr: unknown, research needed. BSM_TOKEN_SUBJECT32 = construct.Struct(u'bsm_token_subject32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), IPV4_STRUCT) # Identify an user using a extended Token. # terminal_port: unknown, need research. # net_type: unknown, need research. BSM_TOKEN_SUBJECT32_EX = construct.Struct( u'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT) # au_to_opaque // AUT_OPAQUE BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT # au_to_seq // AUT_SEQ BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER # Program execution with options. # For each argument we are going to have a string+ "\x00". # Example: [00 00 00 02][41 42 43 00 42 42 00] # 2 Arguments, Arg1: [414243] Arg2: [4242]. BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32(u'number_arguments') BSM_TOKEN_EXEC_ARGUMENT = construct.Struct( u'bsm_token_exec_argument', construct.RepeatUntil(_BsmTokenIsEndOfString, construct.StaticField("text", 1))) # au_to_in_addr // AUT_IN_ADDR: BSM_TOKEN_ADDR = IPV4_STRUCT # au_to_in_addr_ext // AUT_IN_ADDR_EX: BSM_TOKEN_ADDR_EXT = construct.Struct(u'bsm_token_addr_ext', construct.UBInt32(u'net_type'), IPV6_STRUCT) # au_to_ip // AUT_IP: # TODO: parse this header in the correct way. BSM_TOKEN_IP = construct.String(u'binary_ipv4_add', 20) # au_to_ipc // AUT_IPC: BSM_TOKEN_IPC = construct.Struct(u'bsm_token_ipc', construct.UBInt8(u'object_type'), construct.UBInt32(u'object_id')) # au_to_ipc_perm // au_to_ipc_perm BSM_TOKEN_IPC_PERM = construct.Struct( u'bsm_token_ipc_perm', construct.UBInt32(u'user_id'), construct.UBInt32(u'group_id'), construct.UBInt32(u'creator_user_id'), construct.UBInt32(u'creator_group_id'), construct.UBInt32(u'access_mode'), construct.UBInt32(u'slot_seq'), construct.UBInt32(u'key')) # au_to_iport // AUT_IPORT: BSM_TOKEN_PORT = construct.UBInt16(u'port_number') # au_to_file // AUT_OTHER_FILE32: BSM_TOKEN_FILE = construct.Struct( u'bsm_token_file', construct.UBInt32(u'timestamp'), construct.UBInt32(u'microsecond'), construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # au_to_subject64 // AUT_SUBJECT64: BSM_TOKEN_SUBJECT64 = construct.Struct(u'bsm_token_subject64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64(u'terminal_port'), IPV4_STRUCT) # au_to_subject64_ex // AU_IPv4: BSM_TOKEN_SUBJECT64_EX = construct.Struct( u'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), construct.UBInt32(u'terminal_type'), BSM_IP_TYPE_SHORT) # au_to_process32 // AUT_PROCESS32: BSM_TOKEN_PROCESS32 = construct.Struct(u'bsm_token_process32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), IPV4_STRUCT) # au_to_process64 // AUT_PROCESS32: BSM_TOKEN_PROCESS64 = construct.Struct(u'bsm_token_process64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64(u'terminal_port'), IPV4_STRUCT) # au_to_process32_ex // AUT_PROCESS32_EX: BSM_TOKEN_PROCESS32_EX = construct.Struct( u'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT) # au_to_process64_ex // AUT_PROCESS64_EX: BSM_TOKEN_PROCESS64_EX = construct.Struct( u'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64(u'terminal_port'), BSM_IP_TYPE_SHORT) # au_to_sock_inet32 // AUT_SOCKINET32: BSM_TOKEN_AUT_SOCKINET32 = construct.Struct( u'bsm_token_aut_sockinet32', construct.UBInt16(u'net_type'), construct.UBInt16(u'port_number'), IPV4_STRUCT) # Info: checked against the source code of XNU, but not against # real BSM file. BSM_TOKEN_AUT_SOCKINET128 = construct.Struct( u'bsm_token_aut_sockinet128', construct.UBInt16(u'net_type'), construct.UBInt16(u'port_number'), IPV6_STRUCT) INET6_ADDR_TYPE = construct.Struct(u'addr_type', construct.UBInt16(u'ip_type'), construct.UBInt16(u'source_port'), construct.UBInt64(u'saddr_high'), construct.UBInt64(u'saddr_low'), construct.UBInt16(u'destination_port'), construct.UBInt64(u'daddr_high'), construct.UBInt64(u'daddr_low')) INET4_ADDR_TYPE = construct.Struct( u'addr_type', construct.UBInt16(u'ip_type'), construct.UBInt16(u'source_port'), construct.UBInt32(u'source_address'), construct.UBInt16(u'destination_port'), construct.UBInt32(u'destination_address')) # au_to_socket_ex // AUT_SOCKET_EX # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6. BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct( u'bsm_token_aut_sockinet32_ex', construct.UBInt16(u'socket_domain'), construct.UBInt16(u'socket_type'), construct.Switch(u'structure_addr_port', _BsmTokenGetSocketDomain, {26: INET6_ADDR_TYPE}, default=INET4_ADDR_TYPE)) # au_to_sock_unix // AUT_SOCKUNIX BSM_TOKEN_SOCKET_UNIX = construct.Struct( u'bsm_token_au_to_sock_unix', construct.UBInt16(u'family'), construct.RepeatUntil(_BsmTokenIsEndOfString, construct.StaticField("path", 1))) # au_to_data // au_to_data # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT. # type: bsmtoken.BSM_TOKEN_DATA_TYPE. # unit_count: number of type values. # BSM_TOKEN_DATA has a end field = type * unit_count BSM_TOKEN_DATA = construct.Struct(u'bsm_token_data', construct.UBInt8(u'how_to_print'), construct.UBInt8(u'data_type'), construct.UBInt8(u'unit_count')) # au_to_attr32 // AUT_ATTR32 BSM_TOKEN_ATTR32 = construct.Struct( u'bsm_token_attr32', construct.UBInt32(u'file_mode'), construct.UBInt32(u'uid'), construct.UBInt32(u'gid'), construct.UBInt32(u'file_system_id'), construct.UBInt64(u'file_system_node_id'), construct.UBInt32(u'device')) # au_to_attr64 // AUT_ATTR64 BSM_TOKEN_ATTR64 = construct.Struct( u'bsm_token_attr64', construct.UBInt32(u'file_mode'), construct.UBInt32(u'uid'), construct.UBInt32(u'gid'), construct.UBInt32(u'file_system_id'), construct.UBInt64(u'file_system_node_id'), construct.UBInt64(u'device')) # au_to_exit // AUT_EXIT BSM_TOKEN_EXIT = construct.Struct(u'bsm_token_exit', construct.UBInt32(u'status'), construct.UBInt32(u'return_value')) # au_to_newgroups // AUT_NEWGROUPS # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group. BSM_TOKEN_GROUPS = construct.UBInt16(u'group_number') # au_to_exec_env == au_to_exec_args BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS # au_to_zonename //AUT_ZONENAME BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT # Token ID. # List of valid Token_ID. # Token_ID -> [NAME_STRUCTURE, STRUCTURE] # Only the checked structures are been added to the valid structures lists. BSM_TYPE_LIST = { 17: [u'BSM_TOKEN_FILE', BSM_TOKEN_FILE], 19: [u'BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER], 20: [u'BSM_HEADER32', BSM_HEADER32], 21: [u'BSM_HEADER64', BSM_HEADER64], 33: [u'BSM_TOKEN_DATA', BSM_TOKEN_DATA], 34: [u'BSM_TOKEN_IPC', BSM_TOKEN_IPC], 35: [u'BSM_TOKEN_PATH', BSM_TOKEN_PATH], 36: [u'BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32], 38: [u'BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32], 39: [u'BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32], 40: [u'BSM_TOKEN_TEXT', BSM_TOKEN_TEXT], 41: [u'BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE], 42: [u'BSM_TOKEN_ADDR', BSM_TOKEN_ADDR], 43: [u'BSM_TOKEN_IP', BSM_TOKEN_IP], 44: [u'BSM_TOKEN_PORT', BSM_TOKEN_PORT], 45: [u'BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32], 47: [u'BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE], 96: [u'BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME], 113: [u'BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64], 114: [u'BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64], 116: [u'BSM_HEADER32_EX', BSM_HEADER32_EX], 119: [u'BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64], 122: [u'BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX], 127: [u'BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX], 128: [u'BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32] } # Untested structures. # When not tested structure is found, we try to parse using also # these structures. BSM_TYPE_LIST_NOT_TESTED = { 49: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32], 50: [u'BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM], 52: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS], 59: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS], 60: [u'BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS], 61: [u'BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV], 62: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32], 82: [u'BSM_TOKEN_EXIT', BSM_TOKEN_EXIT], 115: [u'BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64], 117: [u'BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64], 123: [u'BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX], 124: [u'BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX], 125: [u'BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX], 126: [u'BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT], 129: [u'BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128], 130: [u'BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX] } def __init__(self): """Initializes a parser object.""" super(BsmParser, self).__init__() # Create the dictionary with all token IDs: tested and untested. self.bsm_type_list_all = self.BSM_TYPE_LIST.copy() self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED) def _CopyByteArrayToBase16String(self, byte_array): """Copies a byte array into a base-16 encoded Unicode string. Args: byte_array: A byte array. Returns: A base-16 encoded Unicode string. """ return u''.join([u'{0:02x}'.format(byte) for byte in byte_array]) def _CopyUtf8ByteArrayToString(self, byte_array): """Copies a UTF-8 encoded byte array into a Unicode string. Args: byte_array: A byte array containing an UTF-8 encoded string. Returns: A Unicode string. """ byte_stream = b''.join(map(chr, byte_array)) try: string = byte_stream.decode(u'utf-8') except UnicodeDecodeError: logging.warning(u'Unable to decode UTF-8 formatted byte array.') string = byte_stream.decode(u'utf-8', errors=u'ignore') string, _, _ = string.partition(b'\x00') return string def _IPv4Format(self, address): """Change an integer IPv4 address value for its 4 octets representation. Args: address: integer with the IPv4 address. Returns: IPv4 address in 4 octet representation (class A, B, C, D). """ ipv4_string = self.IPV4_STRUCT.build(address) return socket.inet_ntoa(ipv4_string) def _IPv6Format(self, high, low): """Provide a readable IPv6 IP having the high and low part in 2 integers. Args: high: 64 bits integers number with the high part of the IPv6. low: 64 bits integers number with the low part of the IPv6. Returns: String with a well represented IPv6. """ ipv6_string = self.IPV6_STRUCT.build( construct.Container(high=high, low=low)) # socket.inet_ntop not supported in Windows. if hasattr(socket, u'inet_ntop'): return socket.inet_ntop(socket.AF_INET6, ipv6_string) # TODO: this approach returns double "::", illegal IPv6 addr. str_address = binascii.hexlify(ipv6_string) address = [] blank = False for pos in range(0, len(str_address), 4): if str_address[pos:pos + 4] == u'0000': if not blank: address.append(u'') blank = True else: blank = False address.append(str_address[pos:pos + 4].lstrip(u'0')) return u':'.join(address) def _RawToUTF8(self, byte_stream): """Copies a UTF-8 byte stream into a Unicode string. Args: byte_stream: A byte stream containing an UTF-8 encoded string. Returns: A Unicode string. """ try: string = byte_stream.decode(u'utf-8') except UnicodeDecodeError: logging.warning( u'Decode UTF8 failed, the message string may be cut short.') string = byte_stream.decode(u'utf-8', errors=u'ignore') return string.partition(b'\x00')[0] def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a BSM file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) try: is_bsm = self.VerifyFile(parser_mediator, file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse BSM file with error: {0:s}'.format( exception)) if not is_bsm: raise errors.UnableToParseFile(u'Not a BSM File, unable to parse.') event_object = self.ReadBSMEvent(parser_mediator, file_object) while event_object: parser_mediator.ProduceEvent(event_object) event_object = self.ReadBSMEvent(parser_mediator, file_object) def ReadBSMEvent(self, parser_mediator, file_object): """Returns a BsmEvent from a single BSM entry. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Returns: An event object. """ # A list of tokens that has the entry. extra_tokens = [] offset = file_object.tell() # Token header, first token for each entry. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): return bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u'']) if bsm_type == u'BSM_HEADER32': token = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER64': token = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER32_EX': token = structure.parse_stream(file_object) else: logging.warning( u'Token ID Header {0} not expected at position 0x{1:X}.' u'The parsing of the file cannot be continued'.format( token_id, file_object.tell())) # TODO: if it is a Mac OS X, search for the trailer magic value # as a end of the entry can be a possibility to continue. return length = token.bsm_header.length event_type = u'{0} ({1})'.format( bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type, u'UNKNOWN'), token.bsm_header.event_type) timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( token.timestamp, token.microsecond) # Read until we reach the end of the record. while file_object.tell() < (offset + length): # Check if it is a known token. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'Unable to parse the Token ID at position: {0:d}'.format( file_object.tell())) return if not token_id in self.BSM_TYPE_LIST: pending = (offset + length) - file_object.tell() extra_tokens.extend( self.TryWithUntestedStructures(file_object, token_id, pending)) else: token = self.BSM_TYPE_LIST[token_id][1].parse_stream( file_object) extra_tokens.append( self.FormatToken(token_id, token, file_object)) if file_object.tell() > (offset + length): logging.warning(u'Token ID {0} not expected at position 0x{1:X}.' u'Jumping for the next entry.'.format( token_id, file_object.tell())) try: file_object.seek((offset + length) - file_object.tell(), os.SEEK_CUR) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to jump to next entry with error: {0:s}'.format( exception)) return # BSM can be in more than one OS: BSD, Solaris and Mac OS X. if parser_mediator.platform == u'MacOSX': # In Mac OS X the last two tokens are the return status and the trailer. if len(extra_tokens) >= 2: return_value = extra_tokens[-2:-1][0] if (return_value.startswith(u'[BSM_TOKEN_RETURN32') or return_value.startswith(u'[BSM_TOKEN_RETURN64')): _ = extra_tokens.pop(len(extra_tokens) - 2) else: return_value = u'Return unknown' else: return_value = u'Return unknown' if extra_tokens: trailer = extra_tokens[-1] if trailer.startswith(u'[BSM_TOKEN_TRAILER'): _ = extra_tokens.pop(len(extra_tokens) - 1) else: trailer = u'Trailer unknown' else: trailer = u'Trailer unknown' return MacBsmEvent(event_type, timestamp, u'. '.join(extra_tokens), return_value, trailer, offset) else: # Generic BSM format. if extra_tokens: trailer = extra_tokens[-1] if trailer.startswith(u'[BSM_TOKEN_TRAILER'): _ = extra_tokens.pop(len(extra_tokens) - 1) else: trailer = u'Trailer unknown' else: trailer = u'Trailer unknown' return BsmEvent(event_type, timestamp, u'. '.join(extra_tokens), trailer, offset) def VerifyFile(self, parser_mediator, file_object): """Check if the file is a BSM file. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_event: file that we want to check. Returns: True if this is a valid BSM file, otherwise False. """ if file_object.tell() != 0: file_object.seek(0) # First part of the entry is always a Header. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): return False if token_id not in self.BSM_TYPE_LIST: return False bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u'']) try: if bsm_type == u'BSM_HEADER32': header = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER64': header = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER32_EX': header = structure.parse_stream(file_object) else: return False except (IOError, construct.FieldError): return False if header.bsm_header.version != self.AUDIT_HEADER_VERSION: return False try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): return False # If is Mac OS X BSM file, next entry is a text token indicating # if it is a normal start or it is a recovery track. if parser_mediator.platform == u'MacOSX': bsm_type_list = self.BSM_TYPE_LIST.get(token_id) if not bsm_type_list: return False if bsm_type_list[0] != u'BSM_TOKEN_TEXT': logging.warning( u'It is not a valid first entry for Mac OS X BSM.') return False try: token = self.BSM_TOKEN_TEXT.parse_stream(file_object) except (IOError, construct.FieldError): return text = self._CopyUtf8ByteArrayToString(token.text) if (text != u'launchctl::Audit startup' and text != u'launchctl::Audit recovery'): logging.warning( u'It is not a valid first entry for Mac OS X BSM.') return False file_object.seek(0) return True def TryWithUntestedStructures(self, file_object, token_id, pending): """Try to parse the pending part of the entry using untested structures. Args: file_object: BSM file. token_id: integer with the id that comes from the unknown token. pending: pending length of the entry. Returns: A list of extra tokens data that can be parsed using non-tested structures. A message indicating that a structure cannot be parsed is added for unparsed structures. """ # Data from the unknown structure. start_position = file_object.tell() start_token_id = token_id extra_tokens = [] # Read all the "pending" bytes. try: if token_id in self.bsm_type_list_all: token = self.bsm_type_list_all[token_id][1].parse_stream( file_object) extra_tokens.append( self.FormatToken(token_id, token, file_object)) while file_object.tell() < (start_position + pending): # Check if it is a known token. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'Unable to parse the Token ID at position: {0:d}'. format(file_object.tell())) return if token_id not in self.bsm_type_list_all: break token = self.bsm_type_list_all[token_id][1].parse_stream( file_object) extra_tokens.append( self.FormatToken(token_id, token, file_object)) except (IOError, construct.FieldError): token_id = 255 next_entry = (start_position + pending) if file_object.tell() != next_entry: # Unknown Structure. logging.warning( u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format( start_position - 1, token_id, token_id)) # TODO: another way to save this information must be found. extra_tokens.append(u'Plaso: some tokens from this entry can ' u'not be saved. Entry at 0x{0:X} with unknown ' u'token id "0x{1:X}".'.format( start_position - 1, start_token_id)) # Move to next entry. file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR) # It returns null list because it doesn't know witch structure was # the incorrect structure that makes that it can arrive to the spected # end of the entry. return [] return extra_tokens # TODO: instead of compare the text to know what structure was parsed # is better to compare directly the numeric number (token_id), # less readable, but better performance. def FormatToken(self, token_id, token, file_object): """Parse the Token depending of the type of the structure. Args: token_id: Identification integer of the token_type. token: Token struct to parse. file_object: BSM file. Returns: String with the parsed Token values. """ if token_id not in self.bsm_type_list_all: return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id) bsm_type, _ = self.bsm_type_list_all.get(token_id, [u'', u'']) if bsm_type in [ u'BSM_TOKEN_TEXT', u'BSM_TOKEN_PATH', u'BSM_TOKEN_ZONENAME' ]: try: string = self._CopyUtf8ByteArrayToString(token.text) except TypeError: string = u'Unknown' return u'[{0}: {1:s}]'.format(bsm_type, string) elif bsm_type in [ u'BSM_TOKEN_RETURN32', u'BSM_TOKEN_RETURN64', u'BSM_TOKEN_EXIT' ]: return u'[{0}: {1} ({2}), System call status: {3}]'.format( bsm_type, bsmtoken.BSM_ERRORS.get(token.status, u'Unknown'), token.status, token.return_value) elif bsm_type in [u'BSM_TOKEN_SUBJECT32', u'BSM_TOKEN_SUBJECT64']: return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, self._IPv4Format(token.ipv4)) elif bsm_type in [ u'BSM_TOKEN_SUBJECT32_EX', u'BSM_TOKEN_SUBJECT64_EX' ]: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = u'unknown' return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, ip) elif bsm_type in [u'BSM_TOKEN_ARGUMENT32', u'BSM_TOKEN_ARGUMENT64']: string = self._CopyUtf8ByteArrayToString(token.text) return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format( bsm_type, string, token.num_arg, token.name_arg) elif bsm_type in [u'BSM_TOKEN_EXEC_ARGUMENTS', u'BSM_TOKEN_EXEC_ENV']: arguments = [] for _ in range(0, token): sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream( file_object) string = self._CopyUtf8ByteArrayToString(sub_token.text) arguments.append(string) return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments)) elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32': return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format( bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, u'UNKNOWN'), token.net_type, token.port_number, self._IPv4Format(token.ipv4))) elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET128': return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format( bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, u'UNKNOWN'), token.net_type, token.port_number, self._IPv6Format(token.ipv6.high, token.ipv6.low)) elif bsm_type == u'BSM_TOKEN_ADDR': return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token)) elif bsm_type == u'BSM_TOKEN_IP': return u'[IPv4_Header: 0x{0:s}]'.format(token.encode(u'hex')) elif bsm_type == u'BSM_TOKEN_ADDR_EXT': return u'[{0}: {1} ({2}). Address {3}]'.format( bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, u'UNKNOWN'), token.net_type, self._IPv6Format(token.ipv6.high, token.ipv6.low)) elif bsm_type == u'BSM_TOKEN_PORT': return u'[{0}: {1}]'.format(bsm_type, token) elif bsm_type == u'BSM_TOKEN_TRAILER': return u'[{0}: {1}]'.format(bsm_type, token.record_length) elif bsm_type == u'BSM_TOKEN_FILE': # TODO: if this timestamp is usefull, it must be extracted as a separate # event object. timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( token.timestamp, token.microsecond) date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC) date_time_string = date_time.strftime(u'%Y-%m-%d %H:%M:%S') string = self._CopyUtf8ByteArrayToString(token.text) return u'[{0}: {1:s}, timestamp: {2:s}]'.format( bsm_type, string, date_time_string) elif bsm_type == u'BSM_TOKEN_IPC': return u'[{0}: object type {1}, object id {2}]'.format( bsm_type, token.object_type, token.object_id) elif bsm_type in [u'BSM_TOKEN_PROCESS32', u'BSM_TOKEN_PROCESS64']: return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, self._IPv4Format(token.ipv4)) elif bsm_type in [ u'BSM_TOKEN_PROCESS32_EX', u'BSM_TOKEN_PROCESS64_EX' ]: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = u'unknown' return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, ip) elif bsm_type == u'BSM_TOKEN_DATA': data = [] data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, u'') if data_type == u'AUR_CHAR': for _ in range(token.unit_count): data.append( self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object)) elif data_type == u'AUR_SHORT': for _ in range(token.unit_count): data.append( self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object)) elif data_type == u'AUR_INT32': for _ in range(token.unit_count): data.append( self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)) else: data.append(u'Unknown type data') # TODO: the data when it is string ends with ".", HW a space is return # after uses the UTF-8 conversion. return u'[{0}: Format data: {1}, Data: {2}]'.format( bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print], self._RawToUTF8(u''.join(data))) elif bsm_type in [u'BSM_TOKEN_ATTR32', u'BSM_TOKEN_ATTR64']: return (u'[{0}: Mode: {1}, UID: {2}, GID: {3}, ' u'File system ID: {4}, Node ID: {5}, Device: {6}]').format( bsm_type, token.file_mode, token.uid, token.gid, token.file_system_id, token.file_system_node_id, token.device) elif bsm_type == u'BSM_TOKEN_GROUPS': arguments = [] for _ in range(token): arguments.append( self._RawToUTF8( self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))) return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments)) elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32_EX': if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, u'') == u'INET6': saddr = self._IPv6Format(token.structure_addr_port.saddr_high, token.structure_addr_port.saddr_low) daddr = self._IPv6Format(token.structure_addr_port.daddr_high, token.structure_addr_port.daddr_low) else: saddr = self._IPv4Format( token.structure_addr_port.source_address) daddr = self._IPv4Format( token.structure_addr_port.destination_address) return u'[{0}: from {1} port {2} to {3} port {4}]'.format( bsm_type, saddr, token.structure_addr_port.source_port, daddr, token.structure_addr_port.destination_port) elif bsm_type == u'BSM_TOKEN_IPC_PERM': return (u'[{0}: user id {1}, group id {2}, create user id {3}, ' u'create group id {4}, access {5}]').format( bsm_type, token.user_id, token.group_id, token.creator_user_id, token.creator_group_id, token.access_mode) elif bsm_type == u'BSM_TOKEN_SOCKET_UNIX': string = self._CopyUtf8ByteArrayToString(token.path) return u'[{0}: Family {1}, Path {2:s}]'.format( bsm_type, token.family, string) elif bsm_type == u'BSM_TOKEN_OPAQUE': string = self._CopyByteArrayToBase16String(token.text) return u'[{0}: {1:s}]'.format(bsm_type, string) elif bsm_type == u'BSM_TOKEN_SEQUENCE': return u'[{0}: {1}]'.format(bsm_type, token)
class JavaIDXParser(interface.FileObjectParser): """Parse Java WebStart Cache IDX files for download events. There are five structures defined. 6.02 files had one generic section that retained all data. From 6.03, the file went to a multi-section format where later sections were optional and had variable-lengths. 6.03, 6.04, and 6.05 files all have their main data section (#2) begin at offset 128. The short structure is because 6.05 files deviate after the 8th byte. So, grab the first 8 bytes to ensure it's valid, get the file version, then continue on with the correct structures. """ _INITIAL_FILE_OFFSET = None NAME = u'java_idx' DESCRIPTION = u'Parser for Java WebStart Cache IDX files.' IDX_SHORT_STRUCT = construct.Struct(u'magic', construct.UBInt8(u'busy'), construct.UBInt8(u'incomplete'), construct.UBInt32(u'idx_version')) IDX_602_STRUCT = construct.Struct( u'IDX_602_Full', construct.UBInt16(u'null_space'), construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'), construct.UBInt64(u'last_modified_date'), construct.UBInt64(u'expiration_date'), construct.PascalString(u'version_string', length_field=construct.UBInt16(u'length')), construct.PascalString(u'url', length_field=construct.UBInt16(u'length')), construct.PascalString(u'namespace', length_field=construct.UBInt16(u'length')), construct.UBInt32(u'FieldCount')) IDX_605_SECTION_ONE_STRUCT = construct.Struct( u'IDX_605_Section1', construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'), construct.UBInt64(u'last_modified_date'), construct.UBInt64(u'expiration_date'), construct.UBInt64(u'validation_date'), construct.UBInt8(u'signed'), construct.UBInt32(u'sec2len'), construct.UBInt32(u'sec3len'), construct.UBInt32(u'sec4len')) IDX_605_SECTION_TWO_STRUCT = construct.Struct( u'IDX_605_Section2', construct.PascalString(u'version', length_field=construct.UBInt16(u'length')), construct.PascalString(u'url', length_field=construct.UBInt16(u'length')), construct.PascalString(u'namespec', length_field=construct.UBInt16(u'length')), construct.PascalString(u'ip_address', length_field=construct.UBInt16(u'length')), construct.UBInt32(u'FieldCount')) # Java uses Pascal-style strings, but with a 2-byte length field. JAVA_READUTF_STRING = construct.Struct( u'Java.ReadUTF', construct.PascalString(u'string', length_field=construct.UBInt16(u'length'))) def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Java WebStart Cache IDX file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) try: magic = self.IDX_SHORT_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse Java IDX file with error: {0:s}.'.format( exception)) # Fields magic.busy and magic.incomplete are normally 0x00. They # are set to 0x01 if the file is currently being downloaded. Logic # checks for > 1 to avoid a race condition and still reject any # file with other data. # Field magic.idx_version is the file version, of which only # certain versions are supported. if magic.busy > 1 or magic.incomplete > 1: raise errors.UnableToParseFile(u'Not a valid Java IDX file') if not magic.idx_version in [602, 603, 604, 605]: raise errors.UnableToParseFile(u'Not a valid Java IDX file') # Obtain the relevant values from the file. The last modified date # denotes when the file was last modified on the HOST. For example, # when the file was uploaded to a web server. if magic.idx_version == 602: section_one = self.IDX_602_STRUCT.parse_stream(file_object) last_modified_date = section_one.last_modified_date url = section_one.url ip_address = u'Unknown' http_header_count = section_one.FieldCount elif magic.idx_version in [603, 604, 605]: # IDX 6.03 and 6.04 have two unused bytes before the structure. if magic.idx_version in [603, 604]: file_object.read(2) # IDX 6.03, 6.04, and 6.05 files use the same structures for the # remaining data. section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream( file_object) last_modified_date = section_one.last_modified_date if file_object.get_size() > 128: file_object.seek(128, os.SEEK_SET) # Static offset for section 2. section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream( file_object) url = section_two.url ip_address = section_two.ip_address http_header_count = section_two.FieldCount else: url = u'Unknown' ip_address = u'Unknown' http_header_count = 0 # File offset is now just prior to HTTP headers. Make sure there # are headers, and then parse them to retrieve the download date. download_date = None for field in range(0, http_header_count): field = self.JAVA_READUTF_STRING.parse_stream(file_object) value = self.JAVA_READUTF_STRING.parse_stream(file_object) if field.string == u'date': # Time string "should" be in UTC or have an associated time zone # information in the string itself. If that is not the case then # there is no reliable method for plaso to determine the proper # timezone, so the assumption is that it is UTC. try: download_date = timelib.Timestamp.FromTimeString( value.string, gmt_as_timezone=False) except errors.TimestampError: download_date = None parser_mediator.ProduceExtractionError( u'Unable to parse time value: {0:s}'.format( value.string)) if not url or not ip_address: raise errors.UnableToParseFile( u'Unexpected Error: URL or IP address not found in file.') event_data = JavaIDXEventData() event_data.idx_version = magic.idx_version event_data.ip_address = ip_address event_data.url = url date_time = dfdatetime_java_time.JavaTime(timestamp=last_modified_date) # TODO: Move the timestamp description into eventdata. event = time_events.DateTimeValuesEvent(date_time, u'File Hosted Date') parser_mediator.ProduceEventWithEventData(event, event_data) if section_one: expiration_date = section_one.get(u'expiration_date', None) if expiration_date: date_time = dfdatetime_java_time.JavaTime( timestamp=expiration_date) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_EXPIRATION) parser_mediator.ProduceEventWithEventData(event, event_data) if download_date: event = time_events.TimestampEvent( download_date, definitions.TIME_DESCRIPTION_FILE_DOWNLOADED) parser_mediator.ProduceEventWithEventData(event, event_data)
def _getInvariantCode(self, vbrType, vbrStruct): """ Helper method that finds all the sections of the boot code that can be hashed and compared to a whitelist. This means that localized strings and other variable parameters (BPB, etc...) are excluded. Currently, this method only supports NTFS and Bitlocker VBR. Args: vbrType: unicode string corresponding to the VBR type ('NTFS' or 'bitlocker') vbrStruct: construct.container of the VBR Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code) """ codeStart = 0 codeEnd = None invariantCode = str() expectedLoader = None if vbrType == 'NTFS': # The first three bytes are a jump over the NTFS BPB to where the code really starts (0x54) and a NOP invariantCode += vbrStruct.JumpOverBPB codeStart = 0x54 # NTFS VBR contains localized strings which must be excluded from the hash computation. # Before Windows 8, these strings are located at 4 different offsets which can be calculated by adding # 0x100 to the values respectively stored in bytes 0x1f8, 0x1f9, 0x1fa and 0x1fb. # Starting from Windows 8, these strings are located at 3 different offsets which are directly stored in # little endian words respectively at 0x1f6, 0x1f8 and 0x1fa # Since there is no easy way to tell which version of Windows we are dealing with beforehand, we first # assume it is a Windows < 8 by testing 0x1f8 against all the known first offset. If all tests fail, assume # it is Windows >= 8 and check 0x1f6 against the only known first offset (to date) firstStrOffset = construct.UBInt8('FirstStringOffset').parse( self._raw[0x1f8]) # Windows NT5 if firstStrOffset == 0x83: expectedLoader = 'NT5.1/NT5.2 VBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.0 elif firstStrOffset == 0x80: expectedLoader = 'NT6.0 VBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.1 elif firstStrOffset == 0x8c: expectedLoader = 'NT6.1 VBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.2+ else: firstStrOffset = construct.ULInt16('FirstStringOffset').parse( self._raw[0x1f6:0x1f8]) if firstStrOffset == 0x18a: expectedLoader = 'NT6.2+ VBR' codeEnd = firstStrOffset if codeEnd is None: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format(firstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a NTFS VBR: {0:#x}. ' 'It should be 0x83, 0x80, 0x8c or 0x18a.'.format( firstStrOffset)) codeEnd = 0 elif vbrType == 'bitlocker': expectedLoader = 'NT6.1+ Bitlocker VBR' # The first three bytes are a jump over the NTFS BPB to where the code really starts (0x5A) and a NOP invariantCode += vbrStruct.JumpOverBPB # First section of code (_BITLOCKER_VBR_STRUCT.Code1) invariantCode += vbrStruct.Code1 # In the second section of code, there are localized strings which must be excluded from hash computation. # Their offsets are stored in the last 3 bytes before the VBR signature (0x55aa). # For Windows 8, 8.1 and 10, the first string offset seems to always be 0x100 (ie. FirstStrOffset = 0x00) if vbrStruct.FirstStrOffset != 0: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format( vbrStruct.FirstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a Bitlocker VBR. ' 'It should be 0x00) : {0:#x}'.format( vbrStruct.FirstStrOffset)) codeStart = 0xc8 # Offset of Code2 codeEnd = 0x100 + vbrStruct.FirstStrOffset else: raise NotImplementedError( 'VBR type "{0}" is not implemented yet'.format(vbrType)) self._logger.debug( 'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format( expectedLoader, codeStart, codeEnd)) invariantCode += self._raw[codeStart:codeEnd] return expectedLoader, invariantCode
class _GzipMember(object): """Gzip member. Gzip files have no index of members, so each member must be read sequentially before metadata and random seeks are possible. This class provides caching of gzip member data during the initial read of each member. Attributes: comment (str): comment stored in the member. member_end_offset (int): offset to the end of the member in the parent file object. member_start_offset (int): offset to the start of the member in the parent file object. operating_system (int): type of file system on which the compression took place. original_filename (str): original filename of the uncompressed file. uncompressed_data_offset (int): offset of the start of the uncompressed data in this member relative to the whole gzip file's uncompressed data. uncompressed_data_size (int): total size of the data in this gzip member after decompression. """ _MEMBER_HEADER_STRUCT = construct.Struct( 'file_header', construct.ULInt16('signature'), construct.UBInt8('compression_method'), construct.UBInt8('flags'), construct.SLInt32('modification_time'), construct.UBInt8('extra_flags'), construct.UBInt8('operating_system')) _MEMBER_FOOTER_STRUCT = construct.Struct( 'file_footer', construct.ULInt32('checksum'), construct.ULInt32('uncompressed_data_size')) _GZIP_SIGNATURE = 0x8b1f _COMPRESSION_METHOD_DEFLATE = 8 _FLAG_FTEXT = 0x01 _FLAG_FHCRC = 0x02 _FLAG_FEXTRA = 0x04 _FLAG_FNAME = 0x08 _FLAG_FCOMMENT = 0x10 # The maximum size of the uncompressed data cache. _UNCOMPRESSED_DATA_CACHE_SIZE = 2 * 1024 * 1024 def __init__(self, file_object, member_start_offset, uncompressed_data_offset): """Initializes a gzip member. Args: file_object (FileIO): file-like object, containing the gzip member. member_start_offset (int): offset to the beginning of the gzip member in the containing file. uncompressed_data_offset (int): current offset into the uncompressed data in the containing file. """ self.comment = None self.modification_time = None self.operating_system = None self.original_filename = None # Offset into this member's uncompressed data of the first item in # the cache. self._cache_start_offset = None # Offset into this member's uncompressed data of the last item in # the cache. self._cache_end_offset = None self._cache = b'' # Total size of the data in this gzip member after decompression. self.uncompressed_data_size = None # Offset of the start of the uncompressed data in this member relative to # the whole gzip file's uncompressed data. self.uncompressed_data_offset = uncompressed_data_offset # Offset to the start of the member in the parent file object. self.member_start_offset = member_start_offset # Initialize the member with data. self._file_object = file_object self._file_object.seek(self.member_start_offset, os.SEEK_SET) self._ReadAndParseHeader(file_object) # Offset to the beginning of the compressed data in the file object. self._compressed_data_start = file_object.get_offset() self._decompressor_state = _GzipDecompressorState( self._compressed_data_start) self._LoadDataIntoCache(file_object, 0, read_all_data=True) self._ReadAndParseFooter(file_object) # Offset to the end of the member in the parent file object. self.member_end_offset = file_object.get_offset() def GetCacheSize(self): """Determines the size of the uncompressed cached data. Returns: int: number of cached bytes. """ if not self._cache_start_offset or not self._cache_end_offset: return 0 return self._cache_end_offset - self._cache_start_offset def IsCacheFull(self): """Checks whether the uncompressed data cache is full. Returns: bool: True if the cache is full. """ return self.GetCacheSize() >= self._UNCOMPRESSED_DATA_CACHE_SIZE def FlushCache(self): """Empties the cache that holds cached decompressed data.""" self._cache = b'' self._cache_start_offset = None self._cache_end_offset = None self._ResetDecompressorState() def _ResetDecompressorState(self): """Resets the state of the internal decompression object.""" self._decompressor_state = _GzipDecompressorState( self._compressed_data_start) def ReadAtOffset(self, offset, size=None): """Reads a byte string from the gzip member at the specified offset. The function will read a byte string of the specified size or all of the remaining data if no size was specified. Args: offset (int): offset within the uncompressed data in this member to read from. size (Optional[int]): maximum number of bytes to read, where None represents all remaining data, to a maximum of the uncompressed cache size. Returns: bytes: data read. Raises: IOError: if the read failed. ValueError: if a negative read size or offset is specified. """ if size is not None and size < 0: raise ValueError('Invalid size value {0!d}'.format(size)) if offset < 0: raise ValueError('Invalid offset value {0!d}'.format(offset)) if size == 0 or offset >= self.uncompressed_data_size: return b'' if self._cache_start_offset is None: self._LoadDataIntoCache(self._file_object, offset) if offset > self._cache_end_offset or offset < self._cache_start_offset: self.FlushCache() self._LoadDataIntoCache(self._file_object, offset) cache_offset = offset - self._cache_start_offset if not size: return self._cache[cache_offset:] data_end_offset = cache_offset + size if data_end_offset > self._cache_end_offset: return self._cache[cache_offset:] return self._cache[cache_offset:data_end_offset] def _LoadDataIntoCache(self, file_object, minimum_offset, read_all_data=False): """Reads and decompresses the data in the member. This function already loads as much data as possible in the cache, up to UNCOMPRESSED_DATA_CACHE_SIZE bytes. Args: file_object (FileIO): file-like object. minimum_offset (int): offset into this member's uncompressed data at which the cache should start. read_all_data (bool): True if all the compressed data should be read from the member. """ # Decompression can only be performed from beginning to end of the stream. # So, if data before the current position of the decompressor in the stream # is required, it's necessary to throw away the current decompression # state and start again. if minimum_offset < self._decompressor_state.uncompressed_offset: self._ResetDecompressorState() while not self.IsCacheFull() or read_all_data: decompressed_data = self._decompressor_state.Read(file_object) decompressed_data_length = len(decompressed_data) decompressed_end_offset = self._decompressor_state.uncompressed_offset decompressed_start_offset = (decompressed_end_offset - decompressed_data_length) data_to_add = decompressed_data added_data_start_offset = decompressed_start_offset if decompressed_start_offset < minimum_offset: data_to_add = None if decompressed_start_offset < minimum_offset < decompressed_end_offset: data_add_offset = decompressed_end_offset - minimum_offset data_to_add = decompressed_data[-data_add_offset] added_data_start_offset = decompressed_end_offset - data_add_offset if not self.IsCacheFull() and data_to_add: self._cache = b''.join([self._cache, data_to_add]) if self._cache_start_offset is None: self._cache_start_offset = added_data_start_offset if self._cache_end_offset is None: self._cache_end_offset = self._cache_start_offset + len( data_to_add) else: self._cache_end_offset += len(data_to_add) # If there's no more data in the member, the unused_data value is # populated in the decompressor. When this situation arises, we rewind # to the end of the compressed_data section. unused_data = self._decompressor_state.GetUnusedData() if unused_data: seek_offset = -len(unused_data) file_object.seek(seek_offset, os.SEEK_CUR) self._ResetDecompressorState() break def _ReadAndParseHeader(self, file_object): """Reads the member header and sets relevant member values. Args: file_object (FileIO): file-like object to read from. Raises: FileFormatError: if file format related errors are detected. """ member_header = self._MEMBER_HEADER_STRUCT.parse_stream(file_object) if member_header.signature != self._GZIP_SIGNATURE: raise errors.FileFormatError( 'Unsupported file signature: 0x{0:04x}.'.format( member_header.signature)) if member_header.compression_method != self._COMPRESSION_METHOD_DEFLATE: raise errors.FileFormatError( 'Unsupported compression method: {0:d}.'.format( member_header.compression_method)) self.modification_time = member_header.modification_time self.operating_system = member_header.operating_system if member_header.flags & self._FLAG_FEXTRA: extra_field_data_size = construct.ULInt16( 'extra_field_data_size').parse_stream(file_object) file_object.seek(extra_field_data_size, os.SEEK_CUR) if member_header.flags & self._FLAG_FNAME: # Since encoding is set construct will convert the C string to Unicode. # Note that construct 2 does not support the encoding to be a Unicode # string. self.original_filename = construct.CString( 'original_filename', encoding=b'iso-8859-1').parse_stream(file_object) if member_header.flags & self._FLAG_FCOMMENT: # Since encoding is set construct will convert the C string to Unicode. # Note that construct 2 does not support the encoding to be a Unicode # string. self.comment = construct.CString( 'comment', encoding=b'iso-8859-1').parse_stream(file_object) if member_header.flags & self._FLAG_FHCRC: file_object.read(2) def _ReadAndParseFooter(self, file_object): """Reads the member footer and sets relevant member values. Args: file_object (FileIO): file-like object to read from. Raises: FileFormatError: if file format related errors are detected. """ file_footer = self._MEMBER_FOOTER_STRUCT.parse_stream(file_object) self.uncompressed_data_size = file_footer.uncompressed_data_size
encoder=lambda obj, ctx : list(obj), decoder=lambda obj, ctx : ''.join(obj) ) _commands = { 'choke' : [c.Magic('\x00')], 'unchoke' : [c.Magic('\x01')], 'interested' : [c.Magic('\x02')], 'uninterested' : [c.Magic('\x03')], 'have' : [c.Magic('\x04'), c.UBInt32('index')], 'bitfield' : [c.Magic('\x05'), Bytes('bits')], 'request' : [c.Magic('\x06'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')], 'piece' : [c.Magic('\x07'), c.UBInt32('index'), c.UBInt32('begin'), Bytes('data')], 'cancel' : [c.Magic('\x08'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')], 'port' : [c.Magic('\x09'), c.UBInt16('port')], 'extended' : [c.Magic('\x14'), c.UBInt8('cmd'), Bytes('msg')], } for k, v in _commands.items(): _commands[k] = c.Struct(k, *v) def build_handshake(info_hash, host_id, extensions): bits = bitarray.bitarray([0]*64, endian='little') for i in extensions: bits[i] = True obj = c.Container(info_hash=info_hash, peer_id=host_id, reserved=bits.tobytes()) return _handshake.build(obj)
class CupsIppParser(interface.BaseParser): """Parser for CUPS IPP files. """ NAME = 'cups_ipp' DESCRIPTION = u'Parser for CUPS IPP files.' # INFO: # For each file, we have only one document with three different timestamps: # Created, process and finished. # Format: # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] # GROUP ID: [1byte ID] # PAIR: [TagID][\x00][Name][Value]) # TagID: 1 byte integer with the type of "Value". # Name: [Length][Text][\00] # Name can be empty when the name has more than one value. # Example: family name "lopez mata" with more than one surname. # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] # Value: can be integer, boolean, or text provided by TagID. # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] # If integer, Value: [\x04][Integer] # If text, Value: [Length text][Text][\00] # Magic number that identify the CUPS IPP supported version. IPP_MAJOR_VERSION = 2 IPP_MINOR_VERSION = 0 # Supported Operation ID. IPP_OP_ID = 5 # CUPS IPP File header. CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct', construct.UBInt8('major_version'), construct.UBInt8('minor_version'), construct.UBInt16('operation_id'), construct.UBInt32('request_id')) # Group ID that indicates the end of the IPP Control file. GROUP_END = 3 # Identification Groups. GROUP_LIST = [1, 2, 4, 5, 6, 7] # Type ID. TYPE_GENERAL_INTEGER = 32 TYPE_INTEGER = 33 TYPE_ENUMERATION = 35 TYPE_BOOL = 34 # Type of values that can be extracted. INTEGER_8 = construct.UBInt8('integer') INTEGER_32 = construct.UBInt32('integer') TEXT = construct.PascalString('text', length_field=construct.UBInt8('length')) BOOLEAN = construct.Struct('boolean_value', construct.Padding(1), INTEGER_8) INTEGER = construct.Struct('integer_value', construct.Padding(1), INTEGER_32) # Name of the pair. PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1)) # Specific CUPS IPP to generic name. NAME_PAIR_TRANSLATION = { 'printer-uri': u'uri', 'job-uuid': u'job_id', 'DestinationPrinterID': u'printer_id', 'job-originating-user-name': u'user', 'job-name': u'job_name', 'document-format': u'doc_type', 'job-originating-host-name': u'computer_name', 'com.apple.print.JobInfo.PMApplicationName': u'application', 'com.apple.print.JobInfo.PMJobOwner': u'owner' } def Parse(self, parser_context, file_entry): """Extract a entry from an CUPS IPP file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() file_object.seek(0, os.SEEK_SET) try: header = self.CUPS_IPP_HEADER.parse_stream(file_object) except (IOError, construct.FieldError) as exception: file_object.close() raise errors.UnableToParseFile( u'Unable to parse CUPS IPP Header with error: {0:s}'.format( exception)) if (header.major_version != self.IPP_MAJOR_VERSION or header.minor_version != self.IPP_MINOR_VERSION): file_object.close() raise errors.UnableToParseFile( u'[{0:s}] Unsupported version number.'.format(self.NAME)) if header.operation_id != self.IPP_OP_ID: # Warn if the operation ID differs from the standard one. We should be # able to parse the file nonetheless. logging.debug( u'[{0:s}] Unsupported operation identifier in file: {1:s}.'. format(self.NAME, parser_context.GetDisplayName(file_entry))) # Read the pairs extracting the name and the value. data_dict = {} name, value = self.ReadPair(parser_context, file_entry, file_object) while name or value: # Translate the known "name" CUPS IPP to a generic name value. pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name) data_dict.setdefault(pretty_name, []).append(value) name, value = self.ReadPair(parser_context, file_entry, file_object) # Yield the events. if u'time-at-creation' in data_dict: event_object = CupsIppEvent(data_dict['time-at-creation'][0], eventdata.EventTimestamp.CREATION_TIME, data_dict) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if u'time-at-processing' in data_dict: event_object = CupsIppEvent(data_dict['time-at-processing'][0], eventdata.EventTimestamp.START_TIME, data_dict) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if u'time-at-completed' in data_dict: event_object = CupsIppEvent(data_dict['time-at-completed'][0], eventdata.EventTimestamp.END_TIME, data_dict) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) file_object.close() def ReadPair(self, parser_context, file_entry, file_object): """Reads an attribute name and value pair from a CUPS IPP event. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). file_object: a file-like object that points to a file. Returns: A list of name and value. If name and value cannot be read both are set to None. """ # Pair = Type ID + Name + Value. try: # Can be: # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. # IDtag = Tag ID (1byte) + '0x00'. type_id = self.INTEGER_8.parse_stream(file_object) if type_id == self.GROUP_END: return None, None elif type_id in self.GROUP_LIST: # If it is a group ID we must read the next byte that contains # the first TagID. type_id = self.INTEGER_8.parse_stream(file_object) # 0x00 separator character. _ = self.INTEGER_8.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported identifier in file: {1:s}.'.format( self.NAME, parser_context.GetDisplayName(file_entry))) return None, None # Name = Length name + name + 0x00 try: name = self.PAIR_NAME.parse_stream(file_object).text except (IOError, construct.FieldError): logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format( self.NAME, parser_context.GetDisplayName(file_entry))) return None, None # Value: can be integer, boolean or text select by Type ID. try: if type_id in [ self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION ]: value = self.INTEGER.parse_stream(file_object).integer elif type_id == self.TYPE_BOOL: value = bool(self.BOOLEAN.parse_stream(file_object).integer) else: value = self.TEXT.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported value in file: {1:s}.'.format( self.NAME, parser_context.GetDisplayName(file_entry))) return None, None return name, value
class MasterBootRecord(BootRecord): _MBR_STRUCT = construct.Struct( "mbr", construct.HexDumpAdapter(construct.Bytes("bootloader_code", 440)), construct.Field('disk_signature', 4), construct.Padding(2), construct.Array( 4, construct.Struct( "partitions", construct.SLInt8("state"), construct.BitStruct( "beginning", construct.Octet("head"), construct.Bits("sect", 6), construct.Bits("cyl", 10), ), construct.Enum( construct.UBInt8("type"), Nothing=0x00, FAT12=0x01, XENIX_ROOT=0x02, XENIX_USR=0x03, FAT16_old=0x04, Extended_DOS=0x05, FAT16=0x06, FAT32=0x0b, FAT32_LBA=0x0c, NTFS=0x07, LINUX_SWAP=0x82, LINUX_NATIVE=0x83, PROTECTIVE_MBR=0xee, _default_=construct.Pass, ), construct.BitStruct( "ending", construct.Octet("head"), construct.Bits("sect", 6), construct.Bits("cyl", 10), ), construct.ULInt32( "sector_offset"), # offset from MBR in sectors construct.ULInt32("size"), # in sectors )), construct.Const(construct.Bytes("signature", 2), '55aa'.decode('hex')), ) def __init__(self, filePath, size, offset=None, whitelist=()): self._type = 'MBR' super(MasterBootRecord, self).__init__(filePath, size, offset, whitelist) def _parse(self): """ Main method in charge of parsing the MBR. It will try to parse the boot record according to documented known structure and extract the partition table disk signature and code section. It will then try to narrow down invariant code, hash it and match the hash against a whitelist. If no match was found, it will try some simple heuristics to detect malicious behaviours. Returns: nothing """ try: mbr = self._MBR_STRUCT.parse(self._raw) except construct.core.ConstructError as e: raise InvalidMBRError('Invalid MBR structure: {0}\n{1}'.format( e, hexdump(self._raw))) self._parsePartTable(mbr.partitions) # Windows stores the disk signature at 0x1B8, other MBRs seem to leave this area alone self._diskSignature = mbr.disk_signature # If code section is null, check for protective MBR signature (detected in partition table parsing). If found, # then the machine is likely using UEFI instead of BIOS to boot. If not, it could mean that the sample being # analyzed has been tampered by a bootkit if mbr.bootloader_code.encode('hex') == 440 * '00': if 'Protective MBR' in self._signature: self._signature.append('UEFI (no legacy boot code)') else: self._suspiciousBehaviour.append('Code section is null') else: expectedLoader, invariantCode = self._getInvariantCode( mbr.bootloader_code) codeHash = hashlib.sha256(invariantCode) self._matchHash(codeHash, expectedLoader) if len(self._signature) == 0: # No whitelisted signature matched, try some simple heuristics to flag this MBR as malicious # Note that the self._checkCode method is only given the "invariant" code section to help with the # disassembling. This will obviously leads to broken offsets, but it doesn't matter since the heuristics # don't use them. self._checkCode(invariantCode) def _parsePartTable(self, partitions): """ Private method that parses the partition table of the MBR. Updates self._partTable list. Args: partitions: Construct.Container object of the partition table Returns: nothing """ partNum = 0 for part in partitions: partNum += 1 # Assume a partition entry without size (in LBA) or type is invalid, and do not include it in the listing. if part.size != 0 and part.type != 'Nothing': self._partTable.append((partNum, part.state < 0, part.type, part.sector_offset, part.size)) else: self._logger.debug('Ignoring invalid partition: %s', part) # Early detection of protective MBR so that we don't try to make sense of the MBR partition table if part.type == 'PROTECTIVE_MBR' and partNum == 1: self._logger.debug( 'Protective MBR detected, MBR partition table should not be taken into account. ' 'GPT partition table parser not implemented yet') self._signature.append('Protective MBR') def _getInvariantCode(self, rawCode): """ Helper method that tries to narrow down "invariant code" which can be hashed and compared to well known signatures. Most MBRs have localized error strings which must be excluded from the hash computation because they may vary from a country to another. First, this method tries to detect what kind of MBR it is dealing with. Most of the time, it is enough to to look for some known hardcoded strings that identify "well known" MBR (such as Truecrypt, GRUB2, etc...). Then, this method finds where the strings are and "removes" them (as in "does not include them"). Finding these strings can be achieved by quickly studying the assembly code and looking for how these strings are echoed on screen at boot time (using interrupt 0x10). This research only needs to be done once for each type of MBR but requires an analyst to do it by static analysis. This script cannot take care of this. This method merely implements the results of such work. Currently supported MBR are: - Truecrypt - McAfee Endpoint Encryption (Safeboot) - GRUB2 - Windows (XP to 10) Args: rawCode: str of the code section Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code) """ # By default, assume all the MBR code section will be hashed. It is obviously wrong in most cases, but it allows # for a "default case" which will automatically matches no known hash in case something goes wrong with the # detection. codeStart = 0 codeEnd = len(rawCode) expectedLoader = None invariantCode = str() # TrueCrypt (detected with the hardcoded string following the first jump: " TrueCrypt Boot Loader") if rawCode[0x5:0x1b].encode('hex').upper( ) == '2054727565437279707420426F6F74204C6F61646572': # TrueCrypt uses hardcoded and not-localized error strings. Therefore every TrueCrypt MBR should have the # same code from start to end expectedLoader = 'TrueCrypt MBR' # MacAfee SafeBoot (detected with the hardcoded string following the first jump: "Safeboot ") elif rawCode[0x3:0xc].encode('hex').upper() == '53616665426F6F7420': # Two versions have been seen but both start with a jump to the same offset (0x26). # There are some strings at the of the code section but localization is unlikely so it will be assumed # to be hardcoded (until a localized version is found...). # Therefore, Safeboot code can be hashed from 0x26 to the end of code section invariantCode += rawCode[:0x3] # Hash the first JMP codeStart = 0x26 expectedLoader = 'Safeboot MBR' # GRUB (detected with the hardcoded string "GRUB " located at 0x188) elif rawCode[0x188:0x18d].encode('hex').upper() == '4752554220': # GRUB has some error strings but they are hardcoded and not localized so they can be included in the hash # computation. However GRUB can be installed on a disk (MBR) as well as on a partition (in a kind of VBR). # But in both cases the code used is the same. Since a BPB is needed for the latter case it is also present # in the MBR (but not needed). It therefore has to be excluded from the hash computation. # GRUB is jumping over the BIOS Parameter Block located between 0x3 and 0x5a. # It should be followed by the kernel address (word), kernel sector (dword), kernel sector high (dword) and # boot drive (byte). Therefore the code really starts at 0x65. # These values are hardcoded in boot.img and have little chance to change anytime soon. codeStart = 0x65 invariantCode += rawCode[:0x3] # Hash the first JMP expectedLoader = 'GRUB2 MBR' # Windows MBR cannot be detected with hardcoded strings, so they fall in the default case and further checks # are then made based on the hypothesis that this is indeed a Windows MBR. else: # Starting with NT5.0, the MBR contains localized strings which must be excluded from the hash computation. # These strings are located after the code, at 3 different offsets which can be calculated by adding 0x100 # to the values respectively stored in bytes 0x1b5, 0x1b6 and 0x1b7 (last bytes of the code section). # Eg: The first localized string is at : 0x100 + the value saved at offset 0x1B5 # Even though localized strings can be of different lengths, the offset of the first one does not vary # given one Windows version. This can therefore be used to tell Windows versions apart. firstStrOffset = construct.UBInt8('FirstStringOffset').parse( rawCode[0x1b5]) # Windows NT5 if firstStrOffset == 0x2c: expectedLoader = 'NT5.1/NT5.2 MBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.0 elif firstStrOffset == 0x62: expectedLoader = 'NT6.0 MBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.1+ elif firstStrOffset == 0x63: expectedLoader = 'NT6.1+ MBR' codeEnd = 0x100 + firstStrOffset else: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format(firstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a windows MBR.' 'It should be 0x2c, 0x62 or 0x63) : {0:#x}'.format( firstStrOffset)) self._logger.debug( 'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format( expectedLoader, codeStart, codeEnd)) invariantCode += rawCode[codeStart:codeEnd] return expectedLoader, invariantCode def _checkCode(self, rawCode): md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_16) md.detail = True checkJmp = True for i in md.disasm(rawCode, 0): # Check for JUMPs and CALLs before the first PUSH/RET. if checkJmp and len(i.groups) > 0: # Group check if available if hasattr(capstone.x86, 'X86_GRP_CALL') and hasattr( capstone.x86, 'X86_GRP_RET'): if capstone.x86.X86_GRP_CALL in i.groups or capstone.x86.X86_GRP_JUMP in i.groups: self._suspiciousBehaviour.append( 'JMP or CALL before relocation') checkJmp = False elif capstone.x86.X86_GRP_RET in i.groups: # Stop search after the first PUSH/RET checkJmp = False # Manual check in case capstone version doesn't support CALL and RET groups else: if i.mnemonic[0] == 'j' or i.mnemonic == 'call': self._suspiciousBehaviour.append( 'JMP or CALL before relocation') checkJmp = False elif i.mnemonic[:3] == 'ret': # Stop search after the first PUSH/RET checkJmp = False # Check for unknown interrupt if i.mnemonic == 'int' and i.bytes[1] not in (0x10, 0x13, 0x18, 0x1a): self._suspiciousBehaviour.append( 'Unknown Interrupt : {0:#x}'.format(i.bytes[1]))
class DataBlockFile(object): """Class that contains a data block file.""" SIGNATURE = 0xc104cac3 # TODO: update emtpy, hints, updating and user. _FILE_HEADER = construct.Struct( u'chrome_cache_data_file_header', construct.ULInt32(u'signature'), construct.ULInt16(u'minor_version'), construct.ULInt16(u'major_version'), construct.ULInt16(u'file_number'), construct.ULInt16(u'next_file_number'), construct.ULInt32(u'block_size'), construct.ULInt32(u'number_of_entries'), construct.ULInt32(u'maximum_number_of_entries'), construct.Array(4, construct.ULInt32(u'emtpy')), construct.Array(4, construct.ULInt32(u'hints')), construct.ULInt32(u'updating'), construct.Array(5, construct.ULInt32(u'user')), construct.Array(2028, construct.ULInt32(u'allocation_bitmap'))) _CACHE_ENTRY = construct.Struct( u'chrome_cache_entry', construct.ULInt32(u'hash'), construct.ULInt32(u'next_address'), construct.ULInt32(u'rankings_node_address'), construct.ULInt32(u'reuse_count'), construct.ULInt32(u'refetch_count'), construct.ULInt32(u'state'), construct.ULInt64(u'creation_time'), construct.ULInt32(u'key_size'), construct.ULInt32(u'long_key_address'), construct.Array(4, construct.ULInt32(u'data_stream_sizes')), construct.Array(4, construct.ULInt32(u'data_stream_addresses')), construct.ULInt32(u'flags'), construct.Padding(16), construct.ULInt32(u'self_hash'), construct.Array(160, construct.UBInt8(u'key'))) def __init__(self, debug=False): """Initializes the data block file object. Args: debug (Optional[bool]): True if debug information should be printed. """ super(DataBlockFile, self).__init__() self._debug = debug self._file_object = None self._file_object_opened_in_object = False self.creation_time = None self.block_size = None self.number_of_entries = None self.version = None def _ReadFileHeader(self): """Reads the file header. Raises: IOError: if the file header cannot be read. """ if self._debug: print(u'Seeking file header offset: 0x{0:08x}'.format(0)) self._file_object.seek(0, os.SEEK_SET) file_header_data = self._file_object.read(self._FILE_HEADER.sizeof()) if self._debug: print(u'Data block file header data:') print(hexdump.Hexdump(file_header_data)) try: file_header = self._FILE_HEADER.parse(file_header_data) except construct.FieldError as exception: raise IOError(u'Unable to parse file header with error: {0:s}'.format( exception)) signature = file_header.get(u'signature') if signature != self.SIGNATURE: raise IOError(u'Unsupported data block file signature') self.version = u'{0:d}.{1:d}'.format( file_header.get(u'major_version'), file_header.get(u'minor_version')) if self.version not in [u'2.0', u'2.1']: raise IOError(u'Unsupported data block file version: {0:s}'.format( self.version)) self.version = u'{0:d}.{1:d}'.format( file_header.get(u'major_version'), file_header.get(u'minor_version')) self.block_size = file_header.get(u'block_size') self.number_of_entries = file_header.get(u'number_of_entries') if self._debug: print(u'Signature\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(signature)) print(u'Version\t\t\t\t\t\t\t\t\t: {0:s}'.format(self.version)) print(u'File number\t\t\t\t\t\t\t\t: {0:d}'.format( file_header.get(u'file_number'))) print(u'Next file number\t\t\t\t\t\t\t: {0:d}'.format( file_header.get(u'next_file_number'))) print(u'Block size\t\t\t\t\t\t\t\t: {0:d}'.format(self.block_size)) print(u'Number of entries\t\t\t\t\t\t\t: {0:d}'.format( self.number_of_entries)) print(u'Maximum number of entries\t\t\t\t\t\t: {0:d}'.format( file_header.get(u'maximum_number_of_entries'))) # TODO: print emtpy, hints, updating and user. block_number = 0 block_range_start = 0 block_range_end = 0 in_block_range = False for value_32bit in file_header.get(u'allocation_bitmap'): for unused_bit in range(0, 32): if value_32bit & 0x00000001: if not in_block_range: block_range_start = block_number block_range_end = block_number in_block_range = True block_range_end += 1 elif in_block_range: in_block_range = False if self._debug: print(u'Block range\t: {0:d} - {1:d} ({2:d})'.format( block_range_start, block_range_end, block_range_end - block_range_start)) value_32bit >>= 1 block_number += 1 print(u'') def ReadCacheEntry(self, block_offset): """Reads a cache entry. Args: block_offset (int): offset of the block that contains the cache entry. "" if self._debug: print(u'Seeking cache entry offset: 0x{0:08x}'.format(block_offset)) self._file_object.seek(block_offset, os.SEEK_SET) cache_entry_data = self._file_object.read(self._CACHE_ENTRY.sizeof()) if self._debug: print(u'Data block file cache entry data:') print(hexdump.Hexdump(cache_entry_data)) try: cache_entry_struct = self._CACHE_ENTRY.parse(cache_entry_data) except construct.FieldError as exception: raise IOError(u'Unable to parse cache entry with error: {0:s}'.format( exception)) cache_entry = CacheEntry() cache_entry.hash = cache_entry_struct.get(u'hash') cache_entry.next = CacheAddress(cache_entry_struct.get(u'next_address')) cache_entry.rankings_node = CacheAddress(cache_entry_struct.get( u'rankings_node_address')) cache_entry.creation_time = cache_entry_struct.get(u'creation_time') byte_array = cache_entry_struct.get(u'key') byte_string = b''.join(map(chr, byte_array)) cache_entry.key, _, _ = byte_string.partition(b'\x00') if self._debug: print(u'Hash\t\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(cache_entry.hash)) print(u'Next address\t\t\t\t\t\t\t\t: {0:s}'.format( cache_entry.next.GetDebugString())) print(u'Rankings node address\t\t\t\t\t\t\t: {0:s}'.format( cache_entry.rankings_node.GetDebugString())) print(u'Reuse count\t\t\t\t\t\t\t\t: {0:d}'.format( cache_entry_struct.get(u'reuse_count'))) print(u'Refetch count\t\t\t\t\t\t\t\t: {0:d}'.format( cache_entry_struct.get(u'refetch_count'))) print(u'State\t\t\t\t\t\t\t\t\t: 0x{0:08x}'.format( cache_entry_struct.get(u'state'))) date_string = (datetime.datetime(1601, 1, 1) + datetime.timedelta(microseconds=cache_entry.creation_time)) print(u'Creation time\t\t\t\t\t\t\t\t: {0!s} (0x{1:08x})'.format( date_string, cache_entry.creation_time)) for value in cache_entry_struct.get(u'data_stream_sizes'): print(u'Data stream size\t\t\t\t\t\t\t: {0:d}'.format(value)) cache_address_index = 0 for value in cache_entry_struct.get(u'data_stream_addresses'): cache_address = CacheAddress(value) print(u'Data stream address: {0:d}\t\t\t\t\t\t\t: {1:s}'.format( cache_address_index, cache_address.GetDebugString())) cache_address_index += 1 print(u'Flags\t\t\t\t\t\t\t\t\t: 0x{0:08x}'.format( cache_entry_struct.get(u'flags'))) print(u'Self hash\t\t\t\t\t\t\t\t: 0x{0:08x}'.format( cache_entry_struct.get(u'self_hash'))) try: cache_entry_key = cache_entry.key.decode(u'ascii') except UnicodeDecodeError: logging.warning(( u'Unable to decode cache entry key at cache address: ' u'0x{0:08x}. Characters that cannot be decoded will be ' u'replaced with "?" or "\\ufffd".').format(cache_address.value)) cache_entry_key = cache_entry.key.decode(u'ascii', errors=u'replace') print(u'Key\t\t\t\t\t\t\t\t\t: {0:s}'.format(cache_entry_key)) # TODO: calculate and verify hash. print(u'') return cache_entry def Close(self): """Closes the data block file.""" if self._file_object_opened_in_object: self._file_object.close() self._file_object = None def Open(self, filename): """Opens the data block file. Args: filename (str): path of the file. """ self._file_object = open(filename, 'rb') self._file_object_opened_in_object = True self._ReadFileHeader() def OpenFileObject(self, file_object): """Opens the data block file. Args: file_object (file): file-like object.
def _getInvariantCode(self, rawCode): """ Helper method that tries to narrow down "invariant code" which can be hashed and compared to well known signatures. Most MBRs have localized error strings which must be excluded from the hash computation because they may vary from a country to another. First, this method tries to detect what kind of MBR it is dealing with. Most of the time, it is enough to to look for some known hardcoded strings that identify "well known" MBR (such as Truecrypt, GRUB2, etc...). Then, this method finds where the strings are and "removes" them (as in "does not include them"). Finding these strings can be achieved by quickly studying the assembly code and looking for how these strings are echoed on screen at boot time (using interrupt 0x10). This research only needs to be done once for each type of MBR but requires an analyst to do it by static analysis. This script cannot take care of this. This method merely implements the results of such work. Currently supported MBR are: - Truecrypt - McAfee Endpoint Encryption (Safeboot) - GRUB2 - Windows (XP to 10) Args: rawCode: str of the code section Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code) """ # By default, assume all the MBR code section will be hashed. It is obviously wrong in most cases, but it allows # for a "default case" which will automatically matches no known hash in case something goes wrong with the # detection. codeStart = 0 codeEnd = len(rawCode) expectedLoader = None invariantCode = str() # TrueCrypt (detected with the hardcoded string following the first jump: " TrueCrypt Boot Loader") if rawCode[0x5:0x1b].encode('hex').upper( ) == '2054727565437279707420426F6F74204C6F61646572': # TrueCrypt uses hardcoded and not-localized error strings. Therefore every TrueCrypt MBR should have the # same code from start to end expectedLoader = 'TrueCrypt MBR' # MacAfee SafeBoot (detected with the hardcoded string following the first jump: "Safeboot ") elif rawCode[0x3:0xc].encode('hex').upper() == '53616665426F6F7420': # Two versions have been seen but both start with a jump to the same offset (0x26). # There are some strings at the of the code section but localization is unlikely so it will be assumed # to be hardcoded (until a localized version is found...). # Therefore, Safeboot code can be hashed from 0x26 to the end of code section invariantCode += rawCode[:0x3] # Hash the first JMP codeStart = 0x26 expectedLoader = 'Safeboot MBR' # GRUB (detected with the hardcoded string "GRUB " located at 0x188) elif rawCode[0x188:0x18d].encode('hex').upper() == '4752554220': # GRUB has some error strings but they are hardcoded and not localized so they can be included in the hash # computation. However GRUB can be installed on a disk (MBR) as well as on a partition (in a kind of VBR). # But in both cases the code used is the same. Since a BPB is needed for the latter case it is also present # in the MBR (but not needed). It therefore has to be excluded from the hash computation. # GRUB is jumping over the BIOS Parameter Block located between 0x3 and 0x5a. # It should be followed by the kernel address (word), kernel sector (dword), kernel sector high (dword) and # boot drive (byte). Therefore the code really starts at 0x65. # These values are hardcoded in boot.img and have little chance to change anytime soon. codeStart = 0x65 invariantCode += rawCode[:0x3] # Hash the first JMP expectedLoader = 'GRUB2 MBR' # Windows MBR cannot be detected with hardcoded strings, so they fall in the default case and further checks # are then made based on the hypothesis that this is indeed a Windows MBR. else: # Starting with NT5.0, the MBR contains localized strings which must be excluded from the hash computation. # These strings are located after the code, at 3 different offsets which can be calculated by adding 0x100 # to the values respectively stored in bytes 0x1b5, 0x1b6 and 0x1b7 (last bytes of the code section). # Eg: The first localized string is at : 0x100 + the value saved at offset 0x1B5 # Even though localized strings can be of different lengths, the offset of the first one does not vary # given one Windows version. This can therefore be used to tell Windows versions apart. firstStrOffset = construct.UBInt8('FirstStringOffset').parse( rawCode[0x1b5]) # Windows NT5 if firstStrOffset == 0x2c: expectedLoader = 'NT5.1/NT5.2 MBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.0 elif firstStrOffset == 0x62: expectedLoader = 'NT6.0 MBR' codeEnd = 0x100 + firstStrOffset # Windows NT6.1+ elif firstStrOffset == 0x63: expectedLoader = 'NT6.1+ MBR' codeEnd = 0x100 + firstStrOffset else: self._suspiciousBehaviour.append( 'Invalid string offset: {0:#x}'.format(firstStrOffset)) self._logger.debug( 'First localized string offset is wrong for a windows MBR.' 'It should be 0x2c, 0x62 or 0x63) : {0:#x}'.format( firstStrOffset)) self._logger.debug( 'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format( expectedLoader, codeStart, codeEnd)) invariantCode += rawCode[codeStart:codeEnd] return expectedLoader, invariantCode
class CupsIppParser(interface.FileObjectParser): """Parser for CUPS IPP files. """ NAME = u'cups_ipp' DESCRIPTION = u'Parser for CUPS IPP files.' # INFO: # For each file, we have only one document with three different timestamps: # Created, process and finished. # Format: # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] # GROUP ID: [1byte ID] # PAIR: [TagID][\x00][Name][Value]) # TagID: 1 byte integer with the type of "Value". # Name: [Length][Text][\00] # Name can be empty when the name has more than one value. # Example: family name "lopez mata" with more than one surname. # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] # Value: can be integer, boolean, or text provided by TagID. # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] # If integer, Value: [\x04][Integer] # If text, Value: [Length text][Text][\00] # Magic number that identify the CUPS IPP supported version. IPP_MAJOR_VERSION = 2 IPP_MINOR_VERSION = 0 # Supported Operation ID. IPP_OP_ID = 5 # CUPS IPP File header. CUPS_IPP_HEADER = construct.Struct(u'cups_ipp_header_struct', construct.UBInt8(u'major_version'), construct.UBInt8(u'minor_version'), construct.UBInt16(u'operation_id'), construct.UBInt32(u'request_id')) # Group ID that indicates the end of the IPP Control file. GROUP_END = 3 # Identification Groups. GROUP_LIST = [1, 2, 4, 5, 6, 7] # Type ID, per cups source file ipp-support.c. TYPE_GENERAL_INTEGER = 0x20 TYPE_INTEGER = 0x21 TYPE_BOOL = 0x22 TYPE_ENUMERATION = 0x23 TYPE_DATETIME = 0x31 # Type of values that can be extracted. INTEGER_8 = construct.UBInt8(u'integer') INTEGER_32 = construct.UBInt32(u'integer') TEXT = construct.PascalString(u'text', encoding='utf-8', length_field=construct.UBInt8(u'length')) BOOLEAN = construct.Struct(u'boolean_value', construct.Padding(1), INTEGER_8) INTEGER = construct.Struct(u'integer_value', construct.Padding(1), INTEGER_32) # This is an RFC 2579 datetime. DATETIME = construct.Struct( u'datetime', construct.Padding(1), construct.UBInt16(u'year'), construct.UBInt8(u'month'), construct.UBInt8(u'day'), construct.UBInt8(u'hour'), construct.UBInt8(u'minutes'), construct.UBInt8(u'seconds'), construct.UBInt8(u'deciseconds'), construct.String(u'direction_from_utc', length=1, encoding='ascii'), construct.UBInt8(u'hours_from_utc'), construct.UBInt8(u'minutes_from_utc'), ) # Name of the pair. PAIR_NAME = construct.Struct(u'pair_name', TEXT, construct.Padding(1)) # Specific CUPS IPP to generic name. NAME_PAIR_TRANSLATION = { u'printer-uri': u'uri', u'job-uuid': u'job_id', u'DestinationPrinterID': u'printer_id', u'job-originating-user-name': u'user', u'job-name': u'job_name', u'document-format': u'doc_type', u'job-originating-host-name': u'computer_name', u'com.apple.print.JobInfo.PMApplicationName': u'application', u'com.apple.print.JobInfo.PMJobOwner': u'owner' } def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a CUPS IPP file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: header = self.CUPS_IPP_HEADER.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse CUPS IPP Header with error: {0:s}'.format( exception)) if (header.major_version != self.IPP_MAJOR_VERSION or header.minor_version != self.IPP_MINOR_VERSION): raise errors.UnableToParseFile( u'[{0:s}] Unsupported version number.'.format(self.NAME)) if header.operation_id != self.IPP_OP_ID: # Warn if the operation ID differs from the standard one. We should be # able to parse the file nonetheless. logging.debug( u'[{0:s}] Unsupported operation identifier in file: {1:s}.'. format(self.NAME, parser_mediator.GetDisplayName())) # Read the pairs extracting the name and the value. data_dict = {} name, value = self.ReadPair(parser_mediator, file_object) while name or value: # Translate the known "name" CUPS IPP to a generic name value. pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name) data_dict.setdefault(pretty_name, []).append(value) name, value = self.ReadPair(parser_mediator, file_object) # TODO: Refactor to use a lookup table to do event production. time_dict = {} for key, value in data_dict.items(): if key.startswith(u'date-time-') or key.startswith(u'time-'): time_dict[key] = value del data_dict[key] if u'date-time-at-creation' in time_dict: event_object = CupsIppEvent(time_dict[u'date-time-at-creation'][0], eventdata.EventTimestamp.CREATION_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'date-time-at-processing' in time_dict: event_object = CupsIppEvent( time_dict[u'date-time-at-processing'][0], eventdata.EventTimestamp.START_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'date-time-at-completed' in time_dict: event_object = CupsIppEvent( time_dict[u'date-time-at-completed'][0], eventdata.EventTimestamp.END_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'time-at-creation' in time_dict: time_value = time_dict[u'time-at-creation'][0] timestamp = timelib.Timestamp.FromPosixTime(time_value) event_object = CupsIppEvent(timestamp, eventdata.EventTimestamp.CREATION_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'time-at-processing' in time_dict: time_value = time_dict[u'time-at-processing'][0] timestamp = timelib.Timestamp.FromPosixTime(time_value) event_object = CupsIppEvent(timestamp, eventdata.EventTimestamp.START_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'time-at-completed' in time_dict: time_value = time_dict[u'time-at-completed'][0] timestamp = timelib.Timestamp.FromPosixTime(time_value) event_object = CupsIppEvent(timestamp, eventdata.EventTimestamp.END_TIME, data_dict) parser_mediator.ProduceEvent(event_object) def ReadPair(self, parser_mediator, file_object): """Reads an attribute name and value pair from a CUPS IPP event. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: a file-like object that points to a file. Returns: A list of name and value. If name and value cannot be read both are set to None. """ # Pair = Type ID + Name + Value. try: # Can be: # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. # IDtag = Tag ID (1byte) + '0x00'. type_id = self.INTEGER_8.parse_stream(file_object) if type_id == self.GROUP_END: return None, None elif type_id in self.GROUP_LIST: # If it is a group ID we must read the next byte that contains # the first TagID. type_id = self.INTEGER_8.parse_stream(file_object) # 0x00 separator character. _ = self.INTEGER_8.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported identifier in file: {1:s}.'.format( self.NAME, parser_mediator.GetDisplayName())) return None, None # Name = Length name + name + 0x00 try: name = self.PAIR_NAME.parse_stream(file_object).text except (IOError, construct.FieldError): logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format( self.NAME, parser_mediator.GetDisplayName())) return None, None # Value: can be integer, boolean or text select by Type ID. try: if type_id in [ self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION ]: value = self.INTEGER.parse_stream(file_object).integer elif type_id == self.TYPE_BOOL: value = bool(self.BOOLEAN.parse_stream(file_object).integer) elif type_id == self.TYPE_DATETIME: datetime = self.DATETIME.parse_stream(file_object) value = timelib.Timestamp.FromRFC2579Datetime( datetime.year, datetime.month, datetime.day, datetime.hour, datetime.minutes, datetime.seconds, datetime.deciseconds, datetime.direction_from_utc, datetime.hours_from_utc, datetime.minutes_from_utc) else: value = self.TEXT.parse_stream(file_object) except (IOError, UnicodeDecodeError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported value in file: {1:s}.'.format( self.NAME, parser_mediator.GetDisplayName())) return None, None return name, value
import construct as cst import time import sys con = cst.Container # alias HEADER = cst.Struct('pc_header', cst.Magic('\x00'), cst.Const(cst.UBInt8('lines'), 1), cst.UBInt8('address'), cst.Magic('\x03')) SER_STATUS = cst.BitStruct('serst', cst.Magic('\x01\x01\x00'), cst.Flag('schedule_enabled'), cst.Flag('ack_enabled'), cst.Flag('further_pages'), cst.Flag('interrupt_mode'), cst.Magic('\x00')) PAGE_IDX = cst.Bytes('page_num', 3) TEMPO = cst.BitStruct( 'tempo', cst.Magic('\x01\x01'), cst.Enum(cst.BitField('display_ctrl', 2), TIMED=0, FIXED_ON=1, FIXED_OFF=2), cst.Enum(cst.BitField('persist_time', 4), S2=1, S5=2, S10=3, S20=4, S30=5, S45=6, S60=7, S90=8, S120=9))
class GzipFile(file_object_io.FileObjectIO): """Class that implements a file-like object of a gzip file. The gzip file is a zlib compressed data stream with additional metadata. """ _FILE_HEADER_STRUCT = construct.Struct( u'file_header', construct.ULInt16(u'signature'), construct.UBInt8(u'compression_method'), construct.UBInt8(u'flags'), construct.SLInt32(u'modification_time'), construct.UBInt8(u'extra_flags'), construct.UBInt8(u'operating_system')) _FILE_FOOTER_STRUCT = construct.Struct( u'file_footer', construct.ULInt32(u'checksum'), construct.ULInt32(u'uncompressed_data_size')) _FILE_SIGNATURE = 0x8b1f _COMPRESSION_METHOD_DEFLATE = 8 _FLAG_FTEXT = 0x01 _FLAG_FHCRC = 0x02 _FLAG_FEXTRA = 0x04 _FLAG_FNAME = 0x08 _FLAG_FCOMMENT = 0x10 def __init__(self, resolver_context, file_object=None): """Initializes the file-like object. Args: resolver_context: the resolver context (instance of resolver.Context). file_object: optional file-like object. The default is None. Raises: ValueError: when file_object is set. """ if file_object: raise ValueError(u'File object value set.') super(GzipFile, self).__init__(resolver_context) self._compressed_data_offset = -1 self._compressed_data_size = -1 self.comment = None self.modification_time = None self.operating_system = None self.original_filename = None self.uncompressed_data_size = 0 def _ReadFileHeader(self, file_object): """Reads the file header. Args: file_object: the file-like object to read from. Raises: FileFormatError: if file format related errors are detected. """ file_object.seek(0, os.SEEK_SET) file_header = self._FILE_HEADER_STRUCT.parse_stream(file_object) self._compressed_data_offset = file_object.get_offset() if file_header.signature != self._FILE_SIGNATURE: raise errors.FileFormatError( u'Unsuppored file signature: 0x{0:04x}.'.format( file_header.signature)) if file_header.compression_method != self._COMPRESSION_METHOD_DEFLATE: raise errors.FileFormatError( u'Unsuppored compression method: {0:d}.'.format( file_header.compression_method)) self.modification_time = file_header.modification_time self.operating_system = file_header.operating_system if file_header.flags & self._FLAG_FEXTRA: extra_field_data_size = construct.ULInt16( u'extra_field_data_size').parse_stream(file_object) file_object.seek(extra_field_data_size, os.SEEK_CUR) self._compressed_data_offset += 2 + extra_field_data_size if file_header.flags & self._FLAG_FNAME: # Since encoding is set construct will convert the C string to Unicode. # Note that construct 2 does not support the encoding to be a Unicode # string. self.original_filename = construct.CString( u'original_filename', encoding='iso-8859-1').parse_stream( file_object) self._compressed_data_offset = file_object.get_offset() if file_header.flags & self._FLAG_FCOMMENT: # Since encoding is set construct will convert the C string to Unicode. # Note that construct 2 does not support the encoding to be a Unicode # string. self.comment = construct.CString( u'comment', encoding='iso-8859-1').parse_stream(file_object) self._compressed_data_offset = file_object.get_offset() if file_header.flags & self._FLAG_FHCRC: self._compressed_data_offset += 2 self._compressed_data_size = ( file_object.get_size() - (self._compressed_data_offset + 8)) def _ReadFileFooter(self, file_object): """Reads the file footer. Args: file_object: the file-like object to read from. Raises: FileFormatError: if file format related errors are detected. """ file_object.seek(-8, os.SEEK_END) file_footer = self._FILE_FOOTER_STRUCT.parse_stream(file_object) self.uncompressed_data_size = file_footer.uncompressed_data_size def _OpenFileObject(self, path_spec): """Opens the file-like object defined by path specification. Args: path_spec: optional the path specification (instance of path.PathSpec). The default is None. Returns: A file-like object. """ gzip_file_object = resolver.Resolver.OpenFileObject( path_spec.parent, resolver_context=self._resolver_context) try: self._ReadFileHeader(gzip_file_object) self._ReadFileFooter(gzip_file_object) finally: gzip_file_object.close() path_spec_data_range = data_range_path_spec.DataRangePathSpec( range_offset=self._compressed_data_offset, range_size=self._compressed_data_size, parent=path_spec.parent) path_spec_compressed_stream = ( compressed_stream_path_spec.CompressedStreamPathSpec( compression_method=definitions.COMPRESSION_METHOD_DEFLATE, parent=path_spec_data_range)) return resolver.Resolver.OpenFileObject( path_spec_compressed_stream, resolver_context=self._resolver_context)
class BSMParser(interface.FileObjectParser): """Parser for BSM files.""" NAME = 'bsm_log' DESCRIPTION = 'Parser for BSM log files.' # BSM supported version (0x0b = 11). AUDIT_HEADER_VERSION = 11 # Magic Trail Header. BSM_TOKEN_TRAILER_MAGIC = b'b105' # IP Version constants. AU_IPv4 = 4 AU_IPv6 = 16 IPV4_STRUCT = construct.UBInt32('ipv4') IPV6_STRUCT = construct.Struct( 'ipv6', construct.UBInt64('high'), construct.UBInt64('low')) # Tested structures. # INFO: I have ommited the ID in the structures declaration. # I used the BSM_TYPE first to read the ID, and then, the structure. # Tokens always start with an ID value that identifies their token # type and subsequent structure. _BSM_TOKEN = construct.UBInt8('token_id') # Data type structures. BSM_TOKEN_DATA_CHAR = construct.String('value', 1) BSM_TOKEN_DATA_SHORT = construct.UBInt16('value') BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value') # Common structure used by other structures. # audit_uid: integer, uid that generates the entry. # effective_uid: integer, the permission user used. # effective_gid: integer, the permission group used. # real_uid: integer, user id of the user that execute the process. # real_gid: integer, group id of the group that execute the process. # pid: integer, identification number of the process. # session_id: unknown, need research. BSM_TOKEN_SUBJECT_SHORT = construct.Struct( 'subject_data', construct.UBInt32('audit_uid'), construct.UBInt32('effective_uid'), construct.UBInt32('effective_gid'), construct.UBInt32('real_uid'), construct.UBInt32('real_gid'), construct.UBInt32('pid'), construct.UBInt32('session_id')) # Common structure used by other structures. # Identify the kind of inet (IPv4 or IPv6) # TODO: instead of 16, AU_IPv6 must be used. BSM_IP_TYPE_SHORT = construct.Struct( 'bsm_ip_type_short', construct.UBInt32('net_type'), construct.Switch( 'ip_addr', _BSMTokenGetNetType, {16: IPV6_STRUCT}, default=IPV4_STRUCT)) # Initial fields structure used by header structures. # length: integer, the length of the entry, equal to trailer (doc: length). # version: integer, version of BSM (AUDIT_HEADER_VERSION). # event_type: integer, the type of event (/etc/security/audit_event). # modifier: integer, unknown, need research (It is always 0). BSM_HEADER = construct.Struct( 'bsm_header', construct.UBInt32('length'), construct.UBInt8('version'), construct.UBInt16('event_type'), construct.UBInt16('modifier')) # First token of one entry. # timestamp: unsigned integer, number of seconds since # January 1, 1970 00:00:00 UTC. # microseconds: unsigned integer, number of micro seconds. BSM_HEADER32 = construct.Struct( 'bsm_header32', BSM_HEADER, construct.UBInt32('timestamp'), construct.UBInt32('microseconds')) BSM_HEADER64 = construct.Struct( 'bsm_header64', BSM_HEADER, construct.UBInt64('timestamp'), construct.UBInt64('microseconds')) BSM_HEADER32_EX = construct.Struct( 'bsm_header32_ex', BSM_HEADER, BSM_IP_TYPE_SHORT, construct.UBInt32('timestamp'), construct.UBInt32('microseconds')) # Token TEXT, provides extra information. BSM_TOKEN_TEXT = construct.Struct( 'bsm_token_text', construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # Path of the executable. BSM_TOKEN_PATH = BSM_TOKEN_TEXT # Identified the end of the record (follow by TRAILER). # status: integer that identifies the status of the exit (BSM_ERRORS). # return: returned value from the operation. BSM_TOKEN_RETURN32 = construct.Struct( 'bsm_token_return32', construct.UBInt8('status'), construct.UBInt32('return_value')) BSM_TOKEN_RETURN64 = construct.Struct( 'bsm_token_return64', construct.UBInt8('status'), construct.UBInt64('return_value')) # Identified the number of bytes that was written. # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC). # length: integer that has the number of bytes from the entry size. BSM_TOKEN_TRAILER = construct.Struct( 'bsm_token_trailer', construct.UBInt16('magic'), construct.UBInt32('record_length')) # A 32-bits argument. # num_arg: the number of the argument. # name_arg: the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT32 = construct.Struct( 'bsm_token_argument32', construct.UBInt8('num_arg'), construct.UBInt32('name_arg'), construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # A 64-bits argument. # num_arg: integer, the number of the argument. # name_arg: text, the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT64 = construct.Struct( 'bsm_token_argument64', construct.UBInt8('num_arg'), construct.UBInt64('name_arg'), construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # Identify an user. # terminal_id: unknown, research needed. # terminal_addr: unknown, research needed. BSM_TOKEN_SUBJECT32 = construct.Struct( 'bsm_token_subject32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), IPV4_STRUCT) # Identify an user using a extended Token. # terminal_port: unknown, need research. # net_type: unknown, need research. BSM_TOKEN_SUBJECT32_EX = construct.Struct( 'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), BSM_IP_TYPE_SHORT) # au_to_opaque // AUT_OPAQUE BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT # au_to_seq // AUT_SEQ BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER # Program execution with options. # For each argument we are going to have a string+ "\x00". # Example: [00 00 00 02][41 42 43 00 42 42 00] # 2 Arguments, Arg1: [414243] Arg2: [4242]. BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments') BSM_TOKEN_EXEC_ARGUMENT = construct.Struct( 'bsm_token_exec_argument', construct.RepeatUntil( _BSMTokenIsEndOfString, construct.StaticField("text", 1))) # au_to_in_addr // AUT_IN_ADDR: BSM_TOKEN_ADDR = IPV4_STRUCT # au_to_in_addr_ext // AUT_IN_ADDR_EX: BSM_TOKEN_ADDR_EXT = construct.Struct( 'bsm_token_addr_ext', construct.UBInt32('net_type'), IPV6_STRUCT) # au_to_ip // AUT_IP: # TODO: parse this header in the correct way. BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20) # au_to_ipc // AUT_IPC: BSM_TOKEN_IPC = construct.Struct( 'bsm_token_ipc', construct.UBInt8('object_type'), construct.UBInt32('object_id')) # au_to_ipc_perm // au_to_ipc_perm BSM_TOKEN_IPC_PERM = construct.Struct( 'bsm_token_ipc_perm', construct.UBInt32('user_id'), construct.UBInt32('group_id'), construct.UBInt32('creator_user_id'), construct.UBInt32('creator_group_id'), construct.UBInt32('access_mode'), construct.UBInt32('slot_seq'), construct.UBInt32('key')) # au_to_iport // AUT_IPORT: BSM_TOKEN_PORT = construct.UBInt16('port_number') # au_to_file // AUT_OTHER_FILE32: BSM_TOKEN_FILE = construct.Struct( 'bsm_token_file', construct.UBInt32('timestamp'), construct.UBInt32('microseconds'), construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # au_to_subject64 // AUT_SUBJECT64: BSM_TOKEN_SUBJECT64 = construct.Struct( 'bsm_token_subject64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64('terminal_port'), IPV4_STRUCT) # au_to_subject64_ex // AU_IPv4: BSM_TOKEN_SUBJECT64_EX = construct.Struct( 'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), construct.UBInt32('terminal_type'), BSM_IP_TYPE_SHORT) # au_to_process32 // AUT_PROCESS32: BSM_TOKEN_PROCESS32 = construct.Struct( 'bsm_token_process32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), IPV4_STRUCT) # au_to_process64 // AUT_PROCESS32: BSM_TOKEN_PROCESS64 = construct.Struct( 'bsm_token_process64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64('terminal_port'), IPV4_STRUCT) # au_to_process32_ex // AUT_PROCESS32_EX: BSM_TOKEN_PROCESS32_EX = construct.Struct( 'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), BSM_IP_TYPE_SHORT) # au_to_process64_ex // AUT_PROCESS64_EX: BSM_TOKEN_PROCESS64_EX = construct.Struct( 'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64('terminal_port'), BSM_IP_TYPE_SHORT) # au_to_sock_inet32 // AUT_SOCKINET32: BSM_TOKEN_AUT_SOCKINET32 = construct.Struct( 'bsm_token_aut_sockinet32', construct.UBInt16('net_type'), construct.UBInt16('port_number'), IPV4_STRUCT) # Info: checked against the source code of XNU, but not against # real BSM file. BSM_TOKEN_AUT_SOCKINET128 = construct.Struct( 'bsm_token_aut_sockinet128', construct.UBInt16('net_type'), construct.UBInt16('port_number'), IPV6_STRUCT) INET6_ADDR_TYPE = construct.Struct( 'addr_type', construct.UBInt16('ip_type'), construct.UBInt16('source_port'), construct.UBInt64('saddr_high'), construct.UBInt64('saddr_low'), construct.UBInt16('destination_port'), construct.UBInt64('daddr_high'), construct.UBInt64('daddr_low')) INET4_ADDR_TYPE = construct.Struct( 'addr_type', construct.UBInt16('ip_type'), construct.UBInt16('source_port'), construct.UBInt32('source_address'), construct.UBInt16('destination_port'), construct.UBInt32('destination_address')) # au_to_socket_ex // AUT_SOCKET_EX # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6. BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct( 'bsm_token_aut_sockinet32_ex', construct.UBInt16('socket_domain'), construct.UBInt16('socket_type'), construct.Switch( 'structure_addr_port', _BSMTokenGetSocketDomain, {26: INET6_ADDR_TYPE}, default=INET4_ADDR_TYPE)) # au_to_sock_unix // AUT_SOCKUNIX BSM_TOKEN_SOCKET_UNIX = construct.Struct( 'bsm_token_au_to_sock_unix', construct.UBInt16('family'), construct.RepeatUntil( _BSMTokenIsEndOfString, construct.StaticField("path", 1))) # au_to_data // au_to_data # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT. # type: bsmtoken.BSM_TOKEN_DATA_TYPE. # unit_count: number of type values. # BSM_TOKEN_DATA has a end field = type * unit_count BSM_TOKEN_DATA = construct.Struct( 'bsm_token_data', construct.UBInt8('how_to_print'), construct.UBInt8('data_type'), construct.UBInt8('unit_count')) # au_to_attr32 // AUT_ATTR32 BSM_TOKEN_ATTR32 = construct.Struct( 'bsm_token_attr32', construct.UBInt32('file_mode'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('file_system_id'), construct.UBInt64('file_system_node_id'), construct.UBInt32('device')) # au_to_attr64 // AUT_ATTR64 BSM_TOKEN_ATTR64 = construct.Struct( 'bsm_token_attr64', construct.UBInt32('file_mode'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('file_system_id'), construct.UBInt64('file_system_node_id'), construct.UBInt64('device')) # au_to_exit // AUT_EXIT BSM_TOKEN_EXIT = construct.Struct( 'bsm_token_exit', construct.UBInt32('status'), construct.UBInt32('return_value')) # au_to_newgroups // AUT_NEWGROUPS # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group. BSM_TOKEN_GROUPS = construct.UBInt16('group_number') # au_to_exec_env == au_to_exec_args BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS # au_to_zonename //AUT_ZONENAME BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT # Token ID. # List of valid Token_ID. # Token_ID -> (NAME_STRUCTURE, STRUCTURE) # Only the checked structures are been added to the valid structures lists. _BSM_TOKEN_TYPES = { 17: ('BSM_TOKEN_FILE', BSM_TOKEN_FILE), 19: ('BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER), 20: ('BSM_HEADER32', BSM_HEADER32), 21: ('BSM_HEADER64', BSM_HEADER64), 33: ('BSM_TOKEN_DATA', BSM_TOKEN_DATA), 34: ('BSM_TOKEN_IPC', BSM_TOKEN_IPC), 35: ('BSM_TOKEN_PATH', BSM_TOKEN_PATH), 36: ('BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32), 38: ('BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32), 39: ('BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32), 40: ('BSM_TOKEN_TEXT', BSM_TOKEN_TEXT), 41: ('BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE), 42: ('BSM_TOKEN_ADDR', BSM_TOKEN_ADDR), 43: ('BSM_TOKEN_IP', BSM_TOKEN_IP), 44: ('BSM_TOKEN_PORT', BSM_TOKEN_PORT), 45: ('BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32), 47: ('BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE), 96: ('BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME), 113: ('BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64), 114: ('BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64), 116: ('BSM_HEADER32_EX', BSM_HEADER32_EX), 119: ('BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64), 122: ('BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX), 127: ('BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX), 128: ('BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32)} # Untested structures. # When not tested structure is found, we try to parse using also # these structures. BSM_TYPE_LIST_NOT_TESTED = { 49: ('BSM_TOKEN_ATTR', BSM_TOKEN_ATTR32), 50: ('BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM), 52: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS), 59: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS), 60: ('BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS), 61: ('BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV), 62: ('BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32), 82: ('BSM_TOKEN_EXIT', BSM_TOKEN_EXIT), 115: ('BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64), 117: ('BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64), 123: ('BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX), 124: ('BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX), 125: ('BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX), 126: ('BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT), 129: ('BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128), 130: ('BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX)} MESSAGE_CAN_NOT_SAVE = ( 'Plaso: some tokens from this entry can not be saved. Entry at 0x{0:X} ' 'with unknown token id "0x{1:X}".') # BSM token types: # https://github.com/openbsm/openbsm/blob/master/sys/bsm/audit_record.h _BSM_TOKEN_TYPE_ARGUMENT32 = 45 _BSM_TOKEN_TYPE_ARGUMENT64 = 113 _BSM_TOKEN_TYPE_ATTR = 49 _BSM_TOKEN_TYPE_ATTR32 = 62 _BSM_TOKEN_TYPE_ATTR64 = 115 _BSM_TOKEN_TYPE_EXEC_ARGUMENTS = 60 _BSM_TOKEN_TYPE_EXEC_ENV = 61 _BSM_TOKEN_TYPE_EXIT = 82 _BSM_TOKEN_TYPE_HEADER32 = 20 _BSM_TOKEN_TYPE_HEADER32_EX = 116 _BSM_TOKEN_TYPE_HEADER64 = 21 _BSM_TOKEN_TYPE_PATH = 35 _BSM_TOKEN_TYPE_PROCESS32 = 38 _BSM_TOKEN_TYPE_PROCESS32_EX = 123 _BSM_TOKEN_TYPE_PROCESS64 = 119 _BSM_TOKEN_TYPE_PROCESS64_EX = 124 _BSM_TOKEN_TYPE_RETURN32 = 39 _BSM_TOKEN_TYPE_RETURN64 = 114 _BSM_TOKEN_TYPE_SUBJECT32 = 36 _BSM_TOKEN_TYPE_SUBJECT32_EX = 122 _BSM_TOKEN_TYPE_SUBJECT64 = 117 _BSM_TOKEN_TYPE_SUBJECT64_EX = 125 _BSM_TOKEN_TYPE_TEXT = 40 _BSM_TOKEN_TYPE_ZONENAME = 96 _BSM_ARGUMENT_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_ARGUMENT32, _BSM_TOKEN_TYPE_ARGUMENT64) _BSM_ATTR_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_ATTR, _BSM_TOKEN_TYPE_ATTR32, _BSM_TOKEN_TYPE_ATTR64) _BSM_EXEV_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_EXEC_ARGUMENTS, _BSM_TOKEN_TYPE_EXEC_ENV) _BSM_HEADER_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_HEADER32, _BSM_TOKEN_TYPE_HEADER32_EX, _BSM_TOKEN_TYPE_HEADER64) _BSM_PROCESS_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_PROCESS32, _BSM_TOKEN_TYPE_PROCESS64) _BSM_PROCESS_EX_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_PROCESS32_EX, _BSM_TOKEN_TYPE_PROCESS64_EX) _BSM_RETURN_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_EXIT, _BSM_TOKEN_TYPE_RETURN32, _BSM_TOKEN_TYPE_RETURN64) _BSM_SUBJECT_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_SUBJECT32, _BSM_TOKEN_TYPE_SUBJECT64) _BSM_SUBJECT_EX_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_SUBJECT32_EX, _BSM_TOKEN_TYPE_SUBJECT64_EX) _BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_PATH, _BSM_TOKEN_TYPE_TEXT, _BSM_TOKEN_TYPE_ZONENAME) def __init__(self): """Initializes a parser object.""" super(BSMParser, self).__init__() # Create the dictionary with all token IDs: tested and untested. self._bsm_type_list_all = self._BSM_TOKEN_TYPES.copy() self._bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED) def _CopyByteArrayToBase16String(self, byte_array): """Copies a byte array into a base-16 encoded Unicode string. Args: byte_array (bytes): A byte array. Returns: str: a base-16 encoded Unicode string. """ return ''.join(['{0:02x}'.format(byte) for byte in byte_array]) def _CopyUtf8ByteArrayToString(self, byte_array): """Copies a UTF-8 encoded byte array into a Unicode string. Args: byte_array (bytes): A byte array containing an UTF-8 encoded string. Returns: str: A Unicode string. """ byte_stream = b''.join(map(chr, byte_array)) try: string = byte_stream.decode('utf-8') except UnicodeDecodeError: logging.warning('Unable to decode UTF-8 formatted byte array.') string = byte_stream.decode('utf-8', errors='ignore') string, _, _ = string.partition(b'\x00') return string def _IPv4Format(self, address): """Formats an IPv4 address as a human readable string. Args: address (int): IPv4 address. Returns: str: human readable string of IPv4 address in 4 octet representation: "1.2.3.4". """ ipv4_string = self.IPV4_STRUCT.build(address) return socket.inet_ntoa(ipv4_string) def _IPv6Format(self, high, low): """Formats an IPv6 address as a human readable string. Args: high (int): upper 64-bit part of the IPv6 address. low (int): lower 64-bit part of the IPv6 address. Returns: str: human readable string of IPv6 address. """ ipv6_string = self.IPV6_STRUCT.build( construct.Container(high=high, low=low)) # socket.inet_ntop not supported in Windows. if hasattr(socket, 'inet_ntop'): return socket.inet_ntop(socket.AF_INET6, ipv6_string) # TODO: this approach returns double "::", illegal IPv6 addr. str_address = binascii.hexlify(ipv6_string) address = [] blank = False for pos in range(0, len(str_address), 4): if str_address[pos:pos + 4] == '0000': if not blank: address.append('') blank = True else: blank = False address.append(str_address[pos:pos + 4].lstrip('0')) return ':'.join(address) def _ParseBSMEvent(self, parser_mediator, file_object): """Parses a BSM entry (BSMEvent) from the file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Returns: bool: True if the BSM entry was parsed. """ record_start_offset = file_object.tell() try: token_type = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError(( 'unable to parse BSM token type at offset: 0x{0:08x} with error: ' '{1:s}.').format(record_start_offset, exception)) return False if token_type not in self._BSM_HEADER_TOKEN_TYPES: parser_mediator.ProduceExtractionError( 'unsupported token type: {0:d} at offset: 0x{1:08x}.'.format( token_type, record_start_offset)) # TODO: if it is a Mac OS X, search for the trailer magic value # as a end of the entry can be a possibility to continue. return False _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None)) try: token = record_structure.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError(( 'unable to parse BSM record at offset: 0x{0:08x} with error: ' '{1:s}.').format(record_start_offset, exception)) return False event_type = bsmtoken.BSM_AUDIT_EVENT.get( token.bsm_header.event_type, 'UNKNOWN') event_type = '{0:s} ({1:d})'.format( event_type, token.bsm_header.event_type) timestamp = (token.timestamp * 1000000) + token.microseconds date_time = dfdatetime_posix_time.PosixTimeInMicroseconds( timestamp=timestamp) record_length = token.bsm_header.length record_end_offset = record_start_offset + record_length # A dict of tokens that has the entry. extra_tokens = {} # Read until we reach the end of the record. while file_object.tell() < record_end_offset: # Check if it is a known token. try: token_type = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( 'Unable to parse the Token ID at position: {0:d}'.format( file_object.tell())) return False _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None)) if not record_structure: pending = record_end_offset - file_object.tell() new_extra_tokens = self.TryWithUntestedStructures( file_object, token_type, pending) extra_tokens.update(new_extra_tokens) else: token = record_structure.parse_stream(file_object) new_extra_tokens = self.FormatToken(token_type, token, file_object) extra_tokens.update(new_extra_tokens) if file_object.tell() > record_end_offset: logging.warning( 'Token ID {0:d} not expected at position 0x{1:08x}.' 'Jumping for the next entry.'.format( token_type, file_object.tell())) try: file_object.seek( record_end_offset - file_object.tell(), os.SEEK_CUR) except (IOError, construct.FieldError) as exception: logging.warning( 'Unable to jump to next entry with error: {0:s}'.format(exception)) return False # BSM can be in more than one OS: BSD, Solaris and Mac OS X. if parser_mediator.platform != 'MacOSX': event_data = BSMEventData() else: event_data = MacBSMEventData() # In Mac OS X the last two tokens are the return status and the trailer. return_value = extra_tokens.get('BSM_TOKEN_RETURN32') if not return_value: return_value = extra_tokens.get('BSM_TOKEN_RETURN64') if not return_value: return_value = 'UNKNOWN' event_data.return_value = return_value event_data.event_type = event_type event_data.extra_tokens = extra_tokens event_data.offset = record_start_offset event_data.record_length = record_length # TODO: check why trailer was passed to event in original while # event was expecting record length. # if extra_tokens: # trailer = extra_tokens.get('BSM_TOKEN_TRAILER', 'unknown') event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_CREATION) parser_mediator.ProduceEventWithEventData(event, event_data) return True def _RawToUTF8(self, byte_stream): """Copies a UTF-8 byte stream into a Unicode string. Args: byte_stream (bytes): byte stream containing an UTF-8 encoded string. Returns: str: A Unicode string. """ try: string = byte_stream.decode('utf-8') except UnicodeDecodeError: logging.warning( 'Decode UTF8 failed, the message string may be cut short.') string = byte_stream.decode('utf-8', errors='ignore') return string.partition(b'\x00')[0] def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a BSM file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: is_bsm = self.VerifyFile(parser_mediator, file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( 'Unable to parse BSM file with error: {0:s}'.format(exception)) if not is_bsm: raise errors.UnableToParseFile('Not a BSM File, unable to parse.') file_object.seek(0, os.SEEK_SET) while self._ParseBSMEvent(parser_mediator, file_object): pass def VerifyFile(self, parser_mediator, file_object): """Check if the file is a BSM file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Returns: bool: True if this is a valid BSM file, False otherwise. """ # First part of the entry is always a Header. try: token_type = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): return False if token_type not in self._BSM_HEADER_TOKEN_TYPES: return False _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None)) try: header = record_structure.parse_stream(file_object) except (IOError, construct.FieldError): return False if header.bsm_header.version != self.AUDIT_HEADER_VERSION: return False try: token_identifier = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): return False # If is Mac OS X BSM file, next entry is a text token indicating # if it is a normal start or it is a recovery track. if parser_mediator.platform == 'MacOSX': token_type, record_structure = self._BSM_TOKEN_TYPES.get( token_identifier, ('', None)) if not record_structure: return False if token_type != 'BSM_TOKEN_TEXT': logging.warning('It is not a valid first entry for Mac OS X BSM.') return False try: token = record_structure.parse_stream(file_object) except (IOError, construct.FieldError): return text = self._CopyUtf8ByteArrayToString(token.text) if (text != 'launchctl::Audit startup' and text != 'launchctl::Audit recovery'): logging.warning('It is not a valid first entry for Mac OS X BSM.') return False return True def TryWithUntestedStructures(self, file_object, token_id, pending): """Try to parse the pending part of the entry using untested structures. Args: file_object: BSM file. token_id: integer with the id that comes from the unknown token. pending: pending length of the entry. Returns: A list of extra tokens data that can be parsed using non-tested structures. A message indicating that a structure cannot be parsed is added for unparsed structures. """ # Data from the unknown structure. start_position = file_object.tell() start_token_id = token_id extra_tokens = {} # Read all the "pending" bytes. try: if token_id in self._bsm_type_list_all: token = self._bsm_type_list_all[token_id][1].parse_stream(file_object) new_extra_tokens = self.FormatToken(token_id, token, file_object) extra_tokens.update(new_extra_tokens) while file_object.tell() < (start_position + pending): # Check if it is a known token. try: token_id = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( 'Unable to parse the Token ID at position: {0:d}'.format( file_object.tell())) return if token_id not in self._bsm_type_list_all: break token = self._bsm_type_list_all[token_id][1].parse_stream(file_object) new_extra_tokens = self.FormatToken(token_id, token, file_object) extra_tokens.update(new_extra_tokens) except (IOError, construct.FieldError): token_id = 255 next_entry = (start_position + pending) if file_object.tell() != next_entry: # Unknown Structure. logging.warning('Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format( start_position - 1, token_id, token_id)) # TODO: another way to save this information must be found. extra_tokens.update( {'message': self.MESSAGE_CAN_NOT_SAVE.format( start_position - 1, start_token_id)}) # Move to next entry. file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR) # It returns null list because it doesn't know witch structure was # the incorrect structure that makes that it can arrive to the spected # end of the entry. return {} return extra_tokens def FormatToken(self, token_id, token, file_object): """Parse the Token depending of the type of the structure. Args: token_id (int): identification of the token_type. token (structure): token struct to parse. file_object: BSM file. Returns: (dict): parsed Token values. Keys for returned dictionary are token name like BSM_TOKEN_SUBJECT32. Values of this dictionary are key-value pairs like terminal_ip:127.0.0.1. """ if token_id not in self._bsm_type_list_all: return {} bsm_type, _ = self._bsm_type_list_all.get(token_id, ['', '']) if token_id in self._BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES: try: string = self._CopyUtf8ByteArrayToString(token.text) except TypeError: string = 'Unknown' return {bsm_type: string} elif token_id in self._BSM_RETURN_TOKEN_TYPES: return {bsm_type: { 'error': bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'), 'token_status': token.status, 'call_status': token.return_value }} elif token_id in self._BSM_SUBJECT_TOKEN_TYPES: return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': self._IPv4Format(token.ipv4) }} elif token_id in self._BSM_SUBJECT_EX_TOKEN_TYPES: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format( token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = 'unknown' return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': ip }} elif token_id in self._BSM_ARGUMENT_TOKEN_TYPES: string = self._CopyUtf8ByteArrayToString(token.text) return {bsm_type: { 'string': string, 'num_arg': token.num_arg, 'is': token.name_arg}} elif token_id in self._BSM_EXEV_TOKEN_TYPES: arguments = [] for _ in range(0, token): sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object) string = self._CopyUtf8ByteArrayToString(sub_token.text) arguments.append(string) return {bsm_type: ' '.join(arguments)} elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32': return {bsm_type: { 'protocols': bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), 'net_type': token.net_type, 'port': token.port_number, 'address': self._IPv4Format(token.ipv4) }} elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128': return {bsm_type: { 'protocols': bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), 'net_type': token.net_type, 'port': token.port_number, 'address': self._IPv6Format(token.ipv6.high, token.ipv6.low) }} elif bsm_type == 'BSM_TOKEN_ADDR': return {bsm_type: self._IPv4Format(token)} elif bsm_type == 'BSM_TOKEN_IP': return {'IPv4_Header': '0x{0:s}]'.format(token.encode('hex'))} elif bsm_type == 'BSM_TOKEN_ADDR_EXT': return {bsm_type: { 'protocols': bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), 'net_type': token.net_type, 'address': self._IPv6Format(token.ipv6.high, token.ipv6.low) }} elif bsm_type == 'BSM_TOKEN_PORT': return {bsm_type: token} elif bsm_type == 'BSM_TOKEN_TRAILER': return {bsm_type: token.record_length} elif bsm_type == 'BSM_TOKEN_FILE': # TODO: if this timestamp is usefull, it must be extracted as a separate # event object. timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( token.timestamp, token.microseconds) date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC) date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S') string = self._CopyUtf8ByteArrayToString(token.text) return {bsm_type: {'string': string, 'timestamp': date_time_string}} elif bsm_type == 'BSM_TOKEN_IPC': return {bsm_type: { 'object_type': token.object_type, 'object_id': token.object_id }} elif token_id in self._BSM_PROCESS_TOKEN_TYPES: return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': self._IPv4Format(token.ipv4) }} elif token_id in self._BSM_PROCESS_EX_TOKEN_TYPES: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format( token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = 'unknown' return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': ip }} elif bsm_type == 'BSM_TOKEN_DATA': data = [] data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '') if data_type == 'AUR_CHAR': for _ in range(token.unit_count): data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object)) elif data_type == 'AUR_SHORT': for _ in range(token.unit_count): data.append(self.BSM_TOKEN_DATA_SHORT.parse_stream(file_object)) elif data_type == 'AUR_INT32': for _ in range(token.unit_count): data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)) else: data.append('Unknown type data') # TODO: the data when it is string ends with ".", HW a space is return # after uses the UTF-8 conversion. return {bsm_type: { 'format': bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print], 'data': '{0}'.format(self._RawToUTF8(''.join(map(str, data)))) }} elif token_id in self._BSM_ATTR_TOKEN_TYPES: return {bsm_type: { 'mode': token.file_mode, 'uid': token.uid, 'gid': token.gid, 'system_id': token.file_system_id, 'node_id': token.file_system_node_id, 'device': token.device}} elif bsm_type == 'BSM_TOKEN_GROUPS': arguments = [] for _ in range(token): arguments.append( self._RawToUTF8( self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))) return {bsm_type: ','.join(arguments)} elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX': if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6': saddr = self._IPv6Format( token.structure_addr_port.saddr_high, token.structure_addr_port.saddr_low) daddr = self._IPv6Format( token.structure_addr_port.daddr_high, token.structure_addr_port.daddr_low) else: saddr = self._IPv4Format(token.structure_addr_port.source_address) daddr = self._IPv4Format(token.structure_addr_port.destination_address) return {bsm_type:{ 'from': saddr, 'from_port': token.structure_addr_port.source_port, 'to': daddr, 'to_port': token.structure_addr_port.destination_port}} elif bsm_type == 'BSM_TOKEN_IPC_PERM': return {bsm_type: { 'user_id': token.user_id, 'group_id': token.group_id, 'creator_user_id': token.creator_user_id, 'creator_group_id': token.creator_group_id, 'access': token.access_mode}} elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX': string = self._CopyUtf8ByteArrayToString(token.path) return {bsm_type: {'family': token.family, 'path': string}} elif bsm_type == 'BSM_TOKEN_OPAQUE': string = self._CopyByteArrayToBase16String(token.text) return {bsm_type: string} elif bsm_type == 'BSM_TOKEN_SEQUENCE': return {bsm_type: token}
c.UBInt32('event'), c.UBInt32('ip_addr'), c.UBInt32('key'), c.SBInt32('num_want'), c.UBInt16('port'), ) announce_resp = c.Struct('response', c.UBInt32('action'), c.UBInt32('transaction_id'), c.UBInt32('interval'), c.UBInt32('leechers'), c.UBInt32('seeders'), c.GreedyRange( c.Struct('peer', c.Array(4, c.UBInt8('addr')), c.UBInt16('port') ) ) ) scrape_req = c.Struct('request', c.UBInt64('connection_id'), c.UBInt32('action'), c.UBInt32('transaction_id'), c.GreedyRange( c.Struct('hashes', c.Bytes('info_hash', 20), ) ) )
class JavaIDXParser(interface.BaseParser): """Parse Java IDX files for download events. There are five structures defined. 6.02 files had one generic section that retained all data. From 6.03, the file went to a multi-section format where later sections were optional and had variable-lengths. 6.03, 6.04, and 6.05 files all have their main data section (#2) begin at offset 128. The short structure is because 6.05 files deviate after the 8th byte. So, grab the first 8 bytes to ensure it's valid, get the file version, then continue on with the correct structures. """ NAME = 'java_idx' DESCRIPTION = u'Parser for Java IDX files.' IDX_SHORT_STRUCT = construct.Struct('magic', construct.UBInt8('busy'), construct.UBInt8('incomplete'), construct.UBInt32('idx_version')) IDX_602_STRUCT = construct.Struct( 'IDX_602_Full', construct.UBInt16('null_space'), construct.UBInt8('shortcut'), construct.UBInt32('content_length'), construct.UBInt64('last_modified_date'), construct.UBInt64('expiration_date'), construct.PascalString('version_string', length_field=construct.UBInt16('length')), construct.PascalString('url', length_field=construct.UBInt16('length')), construct.PascalString('namespace', length_field=construct.UBInt16('length')), construct.UBInt32('FieldCount')) IDX_605_SECTION_ONE_STRUCT = construct.Struct( 'IDX_605_Section1', construct.UBInt8('shortcut'), construct.UBInt32('content_length'), construct.UBInt64('last_modified_date'), construct.UBInt64('expiration_date'), construct.UBInt64('validation_date'), construct.UBInt8('signed'), construct.UBInt32('sec2len'), construct.UBInt32('sec3len'), construct.UBInt32('sec4len')) IDX_605_SECTION_TWO_STRUCT = construct.Struct( 'IDX_605_Section2', construct.PascalString('version', length_field=construct.UBInt16('length')), construct.PascalString('url', length_field=construct.UBInt16('length')), construct.PascalString('namespec', length_field=construct.UBInt16('length')), construct.PascalString('ip_address', length_field=construct.UBInt16('length')), construct.UBInt32('FieldCount')) # Java uses Pascal-style strings, but with a 2-byte length field. JAVA_READUTF_STRING = construct.Struct( 'Java.ReadUTF', construct.PascalString('string', length_field=construct.UBInt16('length'))) def Parse(self, parser_context, file_entry): """Extract data from a Java cache IDX file. This is the main parsing engine for the parser. It determines if the selected file is a proper IDX file. It then checks the file version to determine the correct structure to apply to extract data. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() try: magic = self.IDX_SHORT_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse Java IDX file with error: {0:s}.'.format( exception)) # Fields magic.busy and magic.incomplete are normally 0x00. They # are set to 0x01 if the file is currently being downloaded. Logic # checks for > 1 to avoid a race condition and still reject any # file with other data. # Field magic.idx_version is the file version, of which only # certain versions are supported. if magic.busy > 1 or magic.incomplete > 1: raise errors.UnableToParseFile(u'Not a valid Java IDX file') if not magic.idx_version in [602, 603, 604, 605]: raise errors.UnableToParseFile(u'Not a valid Java IDX file') # Obtain the relevant values from the file. The last modified date # denotes when the file was last modified on the HOST. For example, # when the file was uploaded to a web server. if magic.idx_version == 602: section_one = self.IDX_602_STRUCT.parse_stream(file_object) last_modified_date = section_one.last_modified_date url = section_one.url ip_address = 'Unknown' http_header_count = section_one.FieldCount elif magic.idx_version in [603, 604, 605]: # IDX 6.03 and 6.04 have two unused bytes before the structure. if magic.idx_version in [603, 604]: file_object.read(2) # IDX 6.03, 6.04, and 6.05 files use the same structures for the # remaining data. section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream( file_object) last_modified_date = section_one.last_modified_date if file_object.get_size() > 128: file_object.seek(128) # Static offset for section 2. section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream( file_object) url = section_two.url ip_address = section_two.ip_address http_header_count = section_two.FieldCount else: url = 'Unknown' ip_address = 'Unknown' http_header_count = 0 # File offset is now just prior to HTTP headers. Make sure there # are headers, and then parse them to retrieve the download date. download_date = None for field in range(0, http_header_count): field = self.JAVA_READUTF_STRING.parse_stream(file_object) value = self.JAVA_READUTF_STRING.parse_stream(file_object) if field.string == 'date': # Time string "should" be in UTC or have an associated time zone # information in the string itself. If that is not the case then # there is no reliable method for plaso to determine the proper # timezone, so the assumption is that it is UTC. download_date = timelib.Timestamp.FromTimeString( value.string, gmt_as_timezone=False) if not url or not ip_address: raise errors.UnableToParseFile( u'Unexpected Error: URL or IP address not found in file.') last_modified_timestamp = timelib.Timestamp.FromJavaTime( last_modified_date) # TODO: Move the timestamp description fields into eventdata. event_object = JavaIDXEvent(last_modified_timestamp, 'File Hosted Date', magic.idx_version, url, ip_address) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if section_one: expiration_date = section_one.get('expiration_date', None) if expiration_date: expiration_timestamp = timelib.Timestamp.FromJavaTime( expiration_date) event_object = JavaIDXEvent(expiration_timestamp, 'File Expiration Date', magic.idx_version, url, ip_address) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if download_date: event_object = JavaIDXEvent( download_date, eventdata.EventTimestamp.FILE_DOWNLOADED, magic.idx_version, url, ip_address) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Cups Reading Control Files.""" # IMPORTANT: DIRTY PARSE... # MSc Project in Royal Holloway, University of London. __author__ = 'Joaquin Moreno Garijo ([email protected])' import datetime import construct import sys header = construct.Padding(11) attr_id = construct.UBInt8('type') attr_text = construct.CString('text') attr_time = construct.Struct('time', construct.UBInt32('timestamp'), construct.UBInt16('other')) class ControlFile(object): def __init__(self): self.crt_time = 0 self.proc_time = 0 self.comp_time = 0 self.data = [] def printValue(name, value): # print u'{}: {}'.format(name, value)