def _decode(self, obj, context): return '{:08x}-{:04x}-{:04x}-{:04x}-{:s}'.format( construct.ULInt32('foo').parse(obj[0:4]), construct.ULInt16('foo').parse(obj[4:6]), construct.ULInt16('foo').parse(obj[6:8]), construct.UBInt16('foo').parse(obj[8:10]), obj[10:16].encode('hex'))
def __init__(s): s.header_cmd0 = construct.Struct('CMD0Header', construct.UBInt8('magic'), construct.UBInt8('unk_0'), construct.UBInt8('unk_1'), construct.UBInt8('unk_2'), construct.UBInt8('unk_3'), construct.UBInt8('flags'), construct.UBInt8('id_primary'), construct.UBInt8('id_secondary'), construct.UBInt16('error_code'), construct.UBInt16('payload_size_cmd0') ) s.header_cmd1 = construct.Struct('CMD1Header', construct.Padding(48) ) s.header_cmd2 = construct.Struct('CMD2Header', construct.ULInt16('JDN_base'), construct.Padding(2), construct.ULInt32('seconds') ) s.header = construct.Struct('CMDHeader', construct.ULInt16('packet_type'), construct.ULInt16('cmd_id'), construct.ULInt16('payload_size'), construct.ULInt16('seq_id'), construct.Switch('cmd_hdr', lambda ctx: ctx.cmd_id, { 0 : construct.If(lambda ctx: ctx.payload_size >= s.header_cmd0.sizeof(), construct.Embed(s.header_cmd0)), 1 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd1.sizeof(), construct.Embed(s.header_cmd1)), 2 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd2.sizeof(), construct.Embed(s.header_cmd2)) }, default = construct.Pass ) ) s.cmd_handlers = { 0 : s.cmd0, 1 : s.cmd1, 2 : s.cmd2 } s.cmd0_handlers = { 5 : { 6 : s.cmd0_5_6 }, }
def _pmtu_probe_do(self): """ Periodically probes PMTU. """ if not self.manager.config.getboolean("broker", "pmtu_discovery"): return probe_interval = 15 while True: gevent.sleep(probe_interval) # Reset measured PMTU self.probed_pmtu = 0 self.num_pmtu_probes = 0 self.num_pmtu_replies = 0 # Transmit PMTU probes of different sizes multiple times for _ in xrange(4): for size in [1334, 1400, 1450, 1476, 1492, 1500]: try: msg = ControlMessage.build(cs.Container( magic1 = 0x80, magic2 = 0x73A7, version = 1, type = CONTROL_TYPE_PMTUD, data = "" )) # We need to subtract 6 because ControlMessage gets auto-padded to 12 bytes msg += '\x00' * (size - IPV4_HDR_OVERHEAD - L2TP_CONTROL_SIZE - 6) self.socket.send(msg) self.num_pmtu_probes += 1 except gsocket.error: pass gevent.sleep(1) # Collect all acknowledgements if self.num_pmtu_probes != self.num_pmtu_replies: gevent.sleep(3) detected_pmtu = max(self.probed_pmtu - L2TP_TUN_OVERHEAD, 1280) if not self.probed_pmtu or not self.num_pmtu_replies: logger.warning("Got no replies to any PMTU probes for tunnel %d." % self.id) continue elif detected_pmtu > 0 and detected_pmtu != self.pmtu: self.pmtu = detected_pmtu self._update_mtu() # Notify the client of the detected PMTU self.handler.send_message(self.socket, CONTROL_TYPE_PMTU_NTFY, cs.UBInt16("mtu").build(self.pmtu)) # Increase probe interval until it reaches 10 minutes probe_interval = min(600, probe_interval * 2)
def Opaque(subcon): # noqa """ An `opaque`_ sequence of bytes. Such a sequence consists of a 16 bit integer followed that many bytes. It behaves like :py:class:`TLSPrefixedArray` except that it returns a single construct instance and not a sequence of them. :param subcon: The construct to wrap. :type subcon: :py:class:`construct.Construct` .. _opaque: https://tools.ietf.org/html/rfc5246#section-4.3 """ length_field = construct.UBInt16(subcon.name + "_opaque_length") return construct.TunnelAdapter( PrefixedBytes(subcon.name, length_field), subcon, )
construct.ULInt32('offset'), # offset to "FirstToC Offset" _POINTER( lambda ctx: ctx.offset, construct.Struct( 'ftoc_offset', construct.Anchor('abs_offset'), construct.ULInt32('offset'), _POINTER( lambda ctx: ctx.abs_offset + ctx.offset, _REPEAT( lambda obj, ctx: obj.offset == 0x00000000, construct.Struct( 'toc', construct.ULInt32('length'), construct.SLInt16('type'), construct.UBInt16('flag'), construct.ULInt32('level'), construct.ULInt32( 'offset'), # offset to next ToC (0 if none) construct.ULInt32('count'), _ARRAY( lambda ctx: ctx.count, construct.Struct( 'record', construct.ULInt16('type'), construct.ULInt16('flag'), construct.ULInt64( 'offset'), # offset to data record _POINTER( lambda ctx: ctx._._.abs_offset + ctx. offset,
construct.UBInt64('last_offset'), construct.Padding(36)) # Record = [Heap][Record_Struct][Values] # Heap = [Group of Dyn_Value]* # Values = [ADDR_TXT][ADDR_TXT][ADDR_TXT][ADDR_TXT](2x[ADDR_TXT])* # (Host) (Sender) (Facility) (message) # Record Struct ASL_RECORD_STRUCT = construct.Struct('asl_record_struct', construct.Padding(2), construct.UBInt32('tam_entry'), construct.UBInt64('next_offset'), construct.UBInt64('ASLMessageID'), construct.UBInt64('timestamp'), construct.UBInt32('nanosec'), construct.UBInt16('level'), construct.UBInt16('flags'), construct.UBInt32('pid'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('read_uid'), construct.UBInt32('read_gid'), construct.UBInt64('ref_pid')) # Pointer Values ASL_RECORD_ADDR_TXT = construct.Struct('addr_or_text', construct.String('addr_txt', 8)) # Pointer Dynamic Value ASL_RECORD_DYN_VALUE = construct.Struct( 'asl_record_text_header', construct.Padding(2),
def _run(self): """ Starts listening for control messages via the tunnel socket. """ while True: # Receive control messages from the socket try: data, address = self.socket.recvfrom(2048) except gsocket.error, e: if e.errno in (90, 97): # Ignore EMSGSIZE errors as they ocurr when performing PMTU discovery # and remote nodes send us ICMP fragmentation needed messages continue elif e.errno != 9: if self.manager.config.getboolean('log', 'log_ip_addresses'): logger.error( "Socket error %d (%s) in tunnel %d with %s:%d!" % (e.errno, e.strerror, self.id, self.endpoint[0], self.endpoint[1])) else: logger.error("Socket error %d (%s) in tunnel %d!" % (e.errno, e.strerror, self.id)) else: logger.info("Closing control channel for tunnel %d." % self.id) return if address != self.endpoint: # Ignore messages from unknown sources continue # All packets count as liveness indicators self.keep_alive() msg = self.handler.handle(self.socket, data, address) if msg is None: # Message has been handled or is invalid continue elif msg.type == CONTROL_TYPE_ERROR: logger.warning( "Error message received from client, tearing down tunnel %d." % self.id) gevent.spawn(self.manager.close_tunnel, self) return elif msg.type == CONTROL_TYPE_PMTUD: if not self.manager.config.getboolean("broker", "pmtu_discovery"): continue # Reply with ACK packet self.handler.send_message(self.socket, CONTROL_TYPE_PMTUD_ACK, cs.UBInt16("size").build(len(data))) elif msg.type == CONTROL_TYPE_PMTUD_ACK: # Decode ACK packet and extract size psize = cs.UBInt16("size").parse(msg.data) + IPV4_HDR_OVERHEAD if psize > self.probed_pmtu: self.probed_pmtu = psize elif msg.type & MASK_CONTROL_TYPE_RELIABLE: # Reliable messages that require ACK, transmit one now data = msg.data[2:] self.handler.send_message(self.socket, CONTROL_TYPE_REL_ACK, msg.data[:2]) if msg.type == CONTROL_TYPE_LIMIT: # Client requests limit configuration try: limit = LimitMessage.parse(data) except cs.ConstructError: logger.warning( "Invalid limit control message received on tunnel %d." % self.id) return if not self.limits.configure(limit): logger.warning( "Unknown type of limit (%d) requested on tunnel %d." % (limit.type, self.id)) return
import signal import struct import sys import traceback import traffic_control # Control message for our protocol; first few bits are special as we have to # maintain compatibility with LTPv3 in the kernel (first bit must be 1); also # the packet must be at least 12 bytes in length, otherwise some firewalls # may filter it when used over port 53 ControlMessage = cs.Struct( "control", # Ensure that the first bit is 1 (L2TP control packet) cs.Const(cs.UBInt8("magic1"), 0x80), # Reduce conflict matching to other protocols as we run on port 53 cs.Const(cs.UBInt16("magic2"), 0x73A7), # Protocol version to allow future upgrades cs.UBInt8("version"), # Message type cs.UBInt8("type"), # Message data (with length prefix) cs.PascalString("data"), # Pad the message so it is at least 12 bytes long cs.Padding(lambda ctx: max(0, 6 - len(ctx["data"]))), ) # Unreliable messages (0x00 - 0x7F) CONTROL_TYPE_COOKIE = 0x01 CONTROL_TYPE_PREPARE = 0x02 CONTROL_TYPE_ERROR = 0x03 CONTROL_TYPE_TUNNEL = 0x04
"""Cups Reading Control Files.""" # IMPORTANT: DIRTY PARSE... # MSc Project in Royal Holloway, University of London. __author__ = 'Joaquin Moreno Garijo ([email protected])' import datetime import construct import sys header = construct.Padding(11) attr_id = construct.UBInt8('type') attr_text = construct.CString('text') attr_time = construct.Struct('time', construct.UBInt32('timestamp'), construct.UBInt16('other')) class ControlFile(object): def __init__(self): self.crt_time = 0 self.proc_time = 0 self.comp_time = 0 self.data = [] def printValue(name, value): # print u'{}: {}'.format(name, value) if type(name) != str and type(name) != unicode: return elif name == u'printer-uri':
class CupsIppParser(interface.FileObjectParser): """Parser for CUPS IPP files. """ NAME = u'cups_ipp' DESCRIPTION = u'Parser for CUPS IPP files.' # INFO: # For each file, we have only one document with three different timestamps: # Created, process and finished. # Format: # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] # GROUP ID: [1byte ID] # PAIR: [TagID][\x00][Name][Value]) # TagID: 1 byte integer with the type of "Value". # Name: [Length][Text][\00] # Name can be empty when the name has more than one value. # Example: family name "lopez mata" with more than one surname. # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] # Value: can be integer, boolean, or text provided by TagID. # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] # If integer, Value: [\x04][Integer] # If text, Value: [Length text][Text][\00] # Magic number that identify the CUPS IPP supported version. IPP_MAJOR_VERSION = 2 IPP_MINOR_VERSION = 0 # Supported Operation ID. IPP_OP_ID = 5 # CUPS IPP File header. CUPS_IPP_HEADER = construct.Struct(u'cups_ipp_header_struct', construct.UBInt8(u'major_version'), construct.UBInt8(u'minor_version'), construct.UBInt16(u'operation_id'), construct.UBInt32(u'request_id')) # Group ID that indicates the end of the IPP Control file. GROUP_END = 3 # Identification Groups. GROUP_LIST = [1, 2, 4, 5, 6, 7] # Type ID, per cups source file ipp-support.c. TYPE_GENERAL_INTEGER = 0x20 TYPE_INTEGER = 0x21 TYPE_BOOL = 0x22 TYPE_ENUMERATION = 0x23 TYPE_DATETIME = 0x31 # Type of values that can be extracted. INTEGER_8 = construct.UBInt8(u'integer') INTEGER_32 = construct.UBInt32(u'integer') TEXT = construct.PascalString(u'text', encoding='utf-8', length_field=construct.UBInt8(u'length')) BOOLEAN = construct.Struct(u'boolean_value', construct.Padding(1), INTEGER_8) INTEGER = construct.Struct(u'integer_value', construct.Padding(1), INTEGER_32) # This is an RFC 2579 datetime. DATETIME = construct.Struct( u'datetime', construct.Padding(1), construct.UBInt16(u'year'), construct.UBInt8(u'month'), construct.UBInt8(u'day'), construct.UBInt8(u'hour'), construct.UBInt8(u'minutes'), construct.UBInt8(u'seconds'), construct.UBInt8(u'deciseconds'), construct.String(u'direction_from_utc', length=1, encoding='ascii'), construct.UBInt8(u'hours_from_utc'), construct.UBInt8(u'minutes_from_utc'), ) # Name of the pair. PAIR_NAME = construct.Struct(u'pair_name', TEXT, construct.Padding(1)) # Specific CUPS IPP to generic name. NAME_PAIR_TRANSLATION = { u'printer-uri': u'uri', u'job-uuid': u'job_id', u'DestinationPrinterID': u'printer_id', u'job-originating-user-name': u'user', u'job-name': u'job_name', u'document-format': u'doc_type', u'job-originating-host-name': u'computer_name', u'com.apple.print.JobInfo.PMApplicationName': u'application', u'com.apple.print.JobInfo.PMJobOwner': u'owner' } def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a CUPS IPP file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: header = self.CUPS_IPP_HEADER.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse CUPS IPP Header with error: {0:s}'.format( exception)) if (header.major_version != self.IPP_MAJOR_VERSION or header.minor_version != self.IPP_MINOR_VERSION): raise errors.UnableToParseFile( u'[{0:s}] Unsupported version number.'.format(self.NAME)) if header.operation_id != self.IPP_OP_ID: # Warn if the operation ID differs from the standard one. We should be # able to parse the file nonetheless. logging.debug( u'[{0:s}] Unsupported operation identifier in file: {1:s}.'. format(self.NAME, parser_mediator.GetDisplayName())) # Read the pairs extracting the name and the value. data_dict = {} name, value = self.ReadPair(parser_mediator, file_object) while name or value: # Translate the known "name" CUPS IPP to a generic name value. pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name) data_dict.setdefault(pretty_name, []).append(value) name, value = self.ReadPair(parser_mediator, file_object) # TODO: Refactor to use a lookup table to do event production. time_dict = {} for key, value in data_dict.items(): if key.startswith(u'date-time-') or key.startswith(u'time-'): time_dict[key] = value del data_dict[key] if u'date-time-at-creation' in time_dict: event_object = CupsIppEvent(time_dict[u'date-time-at-creation'][0], eventdata.EventTimestamp.CREATION_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'date-time-at-processing' in time_dict: event_object = CupsIppEvent( time_dict[u'date-time-at-processing'][0], eventdata.EventTimestamp.START_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'date-time-at-completed' in time_dict: event_object = CupsIppEvent( time_dict[u'date-time-at-completed'][0], eventdata.EventTimestamp.END_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'time-at-creation' in time_dict: time_value = time_dict[u'time-at-creation'][0] timestamp = timelib.Timestamp.FromPosixTime(time_value) event_object = CupsIppEvent(timestamp, eventdata.EventTimestamp.CREATION_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'time-at-processing' in time_dict: time_value = time_dict[u'time-at-processing'][0] timestamp = timelib.Timestamp.FromPosixTime(time_value) event_object = CupsIppEvent(timestamp, eventdata.EventTimestamp.START_TIME, data_dict) parser_mediator.ProduceEvent(event_object) if u'time-at-completed' in time_dict: time_value = time_dict[u'time-at-completed'][0] timestamp = timelib.Timestamp.FromPosixTime(time_value) event_object = CupsIppEvent(timestamp, eventdata.EventTimestamp.END_TIME, data_dict) parser_mediator.ProduceEvent(event_object) def ReadPair(self, parser_mediator, file_object): """Reads an attribute name and value pair from a CUPS IPP event. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: a file-like object that points to a file. Returns: A list of name and value. If name and value cannot be read both are set to None. """ # Pair = Type ID + Name + Value. try: # Can be: # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. # IDtag = Tag ID (1byte) + '0x00'. type_id = self.INTEGER_8.parse_stream(file_object) if type_id == self.GROUP_END: return None, None elif type_id in self.GROUP_LIST: # If it is a group ID we must read the next byte that contains # the first TagID. type_id = self.INTEGER_8.parse_stream(file_object) # 0x00 separator character. _ = self.INTEGER_8.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported identifier in file: {1:s}.'.format( self.NAME, parser_mediator.GetDisplayName())) return None, None # Name = Length name + name + 0x00 try: name = self.PAIR_NAME.parse_stream(file_object).text except (IOError, construct.FieldError): logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format( self.NAME, parser_mediator.GetDisplayName())) return None, None # Value: can be integer, boolean or text select by Type ID. try: if type_id in [ self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION ]: value = self.INTEGER.parse_stream(file_object).integer elif type_id == self.TYPE_BOOL: value = bool(self.BOOLEAN.parse_stream(file_object).integer) elif type_id == self.TYPE_DATETIME: datetime = self.DATETIME.parse_stream(file_object) value = timelib.Timestamp.FromRFC2579Datetime( datetime.year, datetime.month, datetime.day, datetime.hour, datetime.minutes, datetime.seconds, datetime.deciseconds, datetime.direction_from_utc, datetime.hours_from_utc, datetime.minutes_from_utc) else: value = self.TEXT.parse_stream(file_object) except (IOError, UnicodeDecodeError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported value in file: {1:s}.'.format( self.NAME, parser_mediator.GetDisplayName())) return None, None return name, value
class CPIOArchiveFile(object): """Class that contains a CPIO archive file. Attributes: file_format (str): CPIO file format. """ _CPIO_SIGNATURE_BINARY_BIG_ENDIAN = b'\x71\xc7' _CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN = b'\xc7\x71' _CPIO_SIGNATURE_PORTABLE_ASCII = b'070707' _CPIO_SIGNATURE_NEW_ASCII = b'070701' _CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM = b'070702' _CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct( u'cpio_binary_big_endian_file_entry', construct.UBInt16(u'signature'), construct.UBInt16(u'device_number'), construct.UBInt16(u'inode_number'), construct.UBInt16(u'mode'), construct.UBInt16(u'user_identifier'), construct.UBInt16(u'group_identifier'), construct.UBInt16(u'number_of_links'), construct.UBInt16(u'special_device_number'), construct.UBInt16(u'modification_time_upper'), construct.UBInt16(u'modification_time_lower'), construct.UBInt16(u'path_string_size'), construct.UBInt16(u'file_size_upper'), construct.UBInt16(u'file_size_lower')) _CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct( u'cpio_binary_little_endian_file_entry', construct.ULInt16(u'signature'), construct.ULInt16(u'device_number'), construct.ULInt16(u'inode_number'), construct.ULInt16(u'mode'), construct.ULInt16(u'user_identifier'), construct.ULInt16(u'group_identifier'), construct.ULInt16(u'number_of_links'), construct.ULInt16(u'special_device_number'), construct.ULInt16(u'modification_time_upper'), construct.ULInt16(u'modification_time_lower'), construct.ULInt16(u'path_string_size'), construct.ULInt16(u'file_size_upper'), construct.ULInt16(u'file_size_lower')) _CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT = construct.Struct( u'cpio_portable_ascii_file_entry', construct.Bytes(u'signature', 6), construct.Bytes(u'device_number', 6), construct.Bytes(u'inode_number', 6), construct.Bytes(u'mode', 6), construct.Bytes(u'user_identifier', 6), construct.Bytes(u'group_identifier', 6), construct.Bytes(u'number_of_links', 6), construct.Bytes(u'special_device_number', 6), construct.Bytes(u'modification_time', 11), construct.Bytes(u'path_string_size', 6), construct.Bytes(u'file_size', 11)) _CPIO_NEW_ASCII_FILE_ENTRY_STRUCT = construct.Struct( u'cpio_portable_ascii_file_entry', construct.Bytes(u'signature', 6), construct.Bytes(u'inode_number', 8), construct.Bytes(u'mode', 8), construct.Bytes(u'user_identifier', 8), construct.Bytes(u'group_identifier', 8), construct.Bytes(u'number_of_links', 8), construct.Bytes(u'modification_time', 8), construct.Bytes(u'file_size', 8), construct.Bytes(u'device_major_number', 8), construct.Bytes(u'device_minor_number', 8), construct.Bytes(u'special_device_major_number', 8), construct.Bytes(u'special_device_minor_number', 8), construct.Bytes(u'path_string_size', 8), construct.Bytes(u'checksum', 8)) def __init__(self, debug=False): """Initializes the CPIO archive file object. Args: debug (Optional[bool]): True if debug information should be printed. """ super(CPIOArchiveFile, self).__init__() self._debug = debug self._file_entries = None self._file_object = None self._file_object_opened_in_object = False self._file_size = 0 self.file_format = None self.size = None def _ReadFileEntry(self, file_offset): """Reads a file entry. Args: file_offset (int): current file offset. Raises: IOError: if the file entry cannot be read. """ if self._debug: print(u'Seeking file entry at offset: 0x{0:08x}'.format(file_offset)) self._file_object.seek(file_offset, os.SEEK_SET) if self.file_format == u'bin-big-endian': file_entry_struct = self._CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT elif self.file_format == u'bin-little-endian': file_entry_struct = self._CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT elif self.file_format == u'odc': file_entry_struct = self._CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT elif self.file_format in (u'crc', u'newc'): file_entry_struct = self._CPIO_NEW_ASCII_FILE_ENTRY_STRUCT file_entry_struct_size = file_entry_struct.sizeof() file_entry_data = self._file_object.read(file_entry_struct_size) file_offset += file_entry_struct_size if self._debug: print(u'File entry data:') print(hexdump.Hexdump(file_entry_data)) try: file_entry_struct = file_entry_struct.parse(file_entry_data) except construct.FieldError as exception: raise IOError(( u'Unable to parse file entry data section with error: ' u'{0:s}').file_format(exception)) if self.file_format in (u'bin-big-endian', u'bin-little-endian'): inode_number = file_entry_struct.inode_number mode = file_entry_struct.mode user_identifier = file_entry_struct.user_identifier group_identifier = file_entry_struct.group_identifier modification_time = ( (file_entry_struct.modification_time_upper << 16) | file_entry_struct.modification_time_lower) path_string_size = file_entry_struct.path_string_size file_size = ( (file_entry_struct.file_size_upper << 16) | file_entry_struct.file_size_lower) elif self.file_format == u'odc': inode_number = int(file_entry_struct.inode_number, 8) mode = int(file_entry_struct.mode, 8) user_identifier = int(file_entry_struct.user_identifier, 8) group_identifier = int(file_entry_struct.group_identifier, 8) modification_time = int(file_entry_struct.modification_time, 8) path_string_size = int(file_entry_struct.path_string_size, 8) file_size = int(file_entry_struct.file_size, 8) elif self.file_format in (u'crc', u'newc'): inode_number = int(file_entry_struct.inode_number, 16) mode = int(file_entry_struct.mode, 16) user_identifier = int(file_entry_struct.user_identifier, 16) group_identifier = int(file_entry_struct.group_identifier, 16) modification_time = int(file_entry_struct.modification_time, 16) path_string_size = int(file_entry_struct.path_string_size, 16) file_size = int(file_entry_struct.file_size, 16) if self._debug: if self.file_format in (u'bin-big-endian', u'bin-little-endian'): print(u'Signature\t\t\t\t\t\t\t\t: 0x{0:04x}'.format( file_entry_struct.signature)) else: print(u'Signature\t\t\t\t\t\t\t\t: {0!s}'.format( file_entry_struct.signature)) if self.file_format not in (u'crc', u'newc'): if self.file_format in (u'bin-big-endian', u'bin-little-endian'): device_number = file_entry_struct.device_number elif self.file_format == u'odc': device_number = int(file_entry_struct.device_number, 8) print(u'Device number\t\t\t\t\t\t\t\t: {0:d}'.format(device_number)) print(u'Inode number\t\t\t\t\t\t\t\t: {0:d}'.format(inode_number)) print(u'Mode\t\t\t\t\t\t\t\t\t: {0:o}'.format(mode)) print(u'User identifier (UID)\t\t\t\t\t\t\t: {0:d}'.format( user_identifier)) print(u'Group identifier (GID)\t\t\t\t\t\t\t: {0:d}'.format( group_identifier)) if self.file_format in (u'bin-big-endian', u'bin-little-endian'): number_of_links = file_entry_struct.number_of_links elif self.file_format == u'odc': number_of_links = int(file_entry_struct.number_of_links, 8) elif self.file_format in (u'crc', u'newc'): number_of_links = int(file_entry_struct.number_of_links, 16) print(u'Number of links\t\t\t\t\t\t\t\t: {0:d}'.format(number_of_links)) if self.file_format not in (u'crc', u'newc'): if self.file_format in (u'bin-big-endian', u'bin-little-endian'): special_device_number = file_entry_struct.special_device_number elif self.file_format == u'odc': special_device_number = int( file_entry_struct.special_device_number, 8) print(u'Special device number\t\t\t\t\t\t\t\t: {0:d}'.format( special_device_number)) print(u'Modification time\t\t\t\t\t\t\t: {0:d}'.format(modification_time)) if self.file_format not in (u'crc', u'newc'): print(u'Path string size\t\t\t\t\t\t\t: {0:d}'.format(path_string_size)) print(u'File size\t\t\t\t\t\t\t\t: {0:d}'.format(file_size)) if self.file_format in (u'crc', u'newc'): device_major_number = int(file_entry_struct.device_major_number, 16) print(u'Device major number\t\t\t\t\t\t\t: {0:d}'.format( device_major_number)) device_minor_number = int(file_entry_struct.device_minor_number, 16) print(u'Device minor number\t\t\t\t\t\t\t: {0:d}'.format( device_minor_number)) special_device_major_number = int( file_entry_struct.special_device_major_number, 16) print(u'Special device major number\t\t\t\t\t\t: {0:d}'.format( special_device_major_number)) special_device_minor_number = int( file_entry_struct.special_device_minor_number, 16) print(u'Special device minor number\t\t\t\t\t\t: {0:d}'.format( special_device_minor_number)) print(u'Path string size\t\t\t\t\t\t\t: {0:d}'.format(path_string_size)) checksum = int(file_entry_struct.checksum, 16) print(u'Checksum\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(checksum)) path_string_data = self._file_object.read(path_string_size) file_offset += path_string_size # TODO: should this be ASCII? path_string = path_string_data.decode(u'ascii') path_string, _, _ = path_string.partition(u'\x00') if self._debug: print(u'Path string\t\t\t\t\t\t\t\t: {0:s}'.format(path_string)) if self.file_format in (u'bin-big-endian', u'bin-little-endian'): padding_size = file_offset % 2 if padding_size > 0: padding_size = 2 - padding_size elif self.file_format == u'odc': padding_size = 0 elif self.file_format in (u'crc', u'newc'): padding_size = file_offset % 4 if padding_size > 0: padding_size = 4 - padding_size if self._debug: padding_data = self._file_object.read(padding_size) print(u'Path string alignment padding:') print(hexdump.Hexdump(padding_data)) file_offset += padding_size file_entry = CPIOArchiveFileEntry(self._file_object) file_entry.data_offset = file_offset file_entry.data_size = file_size file_entry.group_identifier = group_identifier file_entry.inode_number = inode_number file_entry.modification_time = modification_time file_entry.path = path_string file_entry.mode = mode file_entry.size = ( file_entry_struct_size + path_string_size + padding_size + file_size) file_entry.user_identifier = user_identifier if self.file_format in (u'crc', u'newc'): file_offset += file_size padding_size = file_offset % 4 if padding_size > 0: padding_size = 4 - padding_size if self._debug: self._file_object.seek(file_offset, os.SEEK_SET) padding_data = self._file_object.read(padding_size) print(u'File data alignment padding:') print(hexdump.Hexdump(padding_data)) file_entry.size += padding_size if self._debug: print(u'') return file_entry def _ReadFileEntries(self): """Reads the file entries from the cpio archive.""" file_offset = 0 while file_offset < self._file_size or self._file_size == 0: file_entry = self._ReadFileEntry(file_offset) file_offset += file_entry.size if file_entry.path == u'TRAILER!!!': break if file_entry.path in self._file_entries: continue self._file_entries[file_entry.path] = file_entry self.size = file_offset def Close(self): """Closes the CPIO archive file.""" if not self._file_object: return if self._file_object_opened_in_object: self._file_object.close() self._file_object_opened_in_object = False self._file_entries = None self._file_object = None def FileEntryExistsByPath(self, path): """Determines if file entry for a specific path exists. Args: path (str): path of the file entry. Returns: bool: True if the file entry exists. """ if self._file_entries is None: return False return path in self._file_entries def GetFileEntries(self, path_prefix=u''): """Retrieves the file entries. Args: path_prefix (Optional[str]): path prefix. Yields: CPIOArchiveFileEntry: CPIO archive file entry. """ for path, file_entry in iter(self._file_entries.items()): if path.startswith(path_prefix): yield file_entry def GetFileEntryByPath(self, path): """Retrieves a file entry for a specific path. Args: path (str): path of the file entry. Returns: CPIOArchiveFileEntry: CPIO archive file entry or None. """ if self._file_entries is None: return return self._file_entries.get(path, None) def Open(self, filename): """Opens the CPIO archive file. Args: filename (str): path of the file.. Raises: IOError: if the file format signature is not supported. """ stat_object = os.stat(filename) file_object = open(filename, 'rb') self.OpenFileObject(file_object) self._file_size = stat_object.st_size self._file_object_opened_in_object = True def OpenFileObject(self, file_object): """Opens the CPIO archive file. Args: file_object (file): file-like object. Raises: IOError: if the file is alread opened or the format signature is not supported. """ if self._file_object: raise IOError(u'Already open') file_object.seek(0, os.SEEK_SET) signature_data = file_object.read(6) self.file_format = None if len(signature_data) > 2: if signature_data[:2] == self._CPIO_SIGNATURE_BINARY_BIG_ENDIAN: self.file_format = u'bin-big-endian' elif signature_data[:2] == self._CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN: self.file_format = u'bin-little-endian' elif signature_data == self._CPIO_SIGNATURE_PORTABLE_ASCII: self.file_format = u'odc' elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII: self.file_format = u'newc' elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM: self.file_format = u'crc' if self.file_format is None: raise IOError(u'Unsupported CPIO format.') self._file_entries = {} self._file_object = file_object self._ReadFileEntries()
class KeychainParser(interface.BaseParser): """Parser for Keychain files.""" NAME = 'mac_keychain' DESCRIPTION = u'Parser for Mac OS X Keychain files.' KEYCHAIN_MAGIC_HEADER = 'kych' KEYCHAIN_MAJOR_VERSION = 1 KEYCHAIN_MINOR_VERSION = 0 RECORD_TYPE_APPLICATION = 0x80000000 RECORD_TYPE_INTERNET = 0x80000001 # DB HEADER. KEYCHAIN_DB_HEADER = construct.Struct( 'db_header', construct.String('magic', 4), construct.UBInt16('major_version'), construct.UBInt16('minor_version'), construct.UBInt32('header_size'), construct.UBInt32('schema_offset'), construct.Padding(4)) # DB SCHEMA. KEYCHAIN_DB_SCHEMA = construct.Struct( 'db_schema', construct.UBInt32('size'), construct.UBInt32('number_of_tables')) # For each number_of_tables, the schema has a TABLE_OFFSET with the # offset starting in the DB_SCHEMA. TABLE_OFFSET = construct.UBInt32('table_offset') TABLE_HEADER = construct.Struct( 'table_header', construct.UBInt32('table_size'), construct.UBInt32('record_type'), construct.UBInt32('number_of_records'), construct.UBInt32('first_record'), construct.UBInt32('index_offset'), construct.Padding(4), construct.UBInt32('recordnumbercount')) RECORD_HEADER = construct.Struct( 'record_header', construct.UBInt32('entry_length'), construct.Padding(12), construct.UBInt32('ssgp_length'), construct.Padding(4), construct.UBInt32('creation_time'), construct.UBInt32('last_mod_time'), construct.UBInt32('text_description'), construct.Padding(4), construct.UBInt32('comments'), construct.Padding(8), construct.UBInt32('entry_name'), construct.Padding(20), construct.UBInt32('account_name'), construct.Padding(4)) RECORD_HEADER_APP = construct.Struct( 'record_entry_app', RECORD_HEADER, construct.Padding(4)) RECORD_HEADER_INET = construct.Struct( 'record_entry_inet', RECORD_HEADER, construct.UBInt32('where'), construct.UBInt32('protocol'), construct.UBInt32('type'), construct.Padding(4), construct.UBInt32('url')) TEXT = construct.PascalString( 'text', length_field = construct.UBInt32('length')) TIME = construct.Struct( 'timestamp', construct.String('year', 4), construct.String('month', 2), construct.String('day', 2), construct.String('hour', 2), construct.String('minute', 2), construct.String('second', 2), construct.Padding(2)) TYPE_TEXT = construct.String('type', 4) # TODO: add more protocols. _PROTOCOL_TRANSLATION_DICT = { u'htps': u'https', u'smtp': u'smtp', u'imap': u'imap', u'http': u'http'} def _GetTimestampFromEntry(self, parser_context, file_entry, structure): """Parse a time entry structure into a microseconds since Epoch in UTC. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). structure: TIME entry structure: year: String with the number of the year. month: String with the number of the month. day: String with the number of the day. hour: String with the number of the month. minute: String with the number of the minute. second: String with the number of the second. Returns: Microseconds since Epoch in UTC. """ try: return timelib.Timestamp.FromTimeParts( int(structure.year, 10), int(structure.month, 10), int(structure.day, 10), int(structure.hour, 10), int(structure.minute, 10), int(structure.second, 10)) except ValueError: logging.warning( u'[{0:s}] Invalid keychain time {1!s} in file: {2:s}'.format( self.NAME, parser_context.GetDisplayName(file_entry), structure)) return 0 def _ReadEntryApplication(self, parser_context, file_object, file_entry=None): """Extracts the information from an application password entry. Args: parser_context: A parser context object (instance of ParserContext). file_object: A file-like object that points to an Keychain file. file_entry: Optional file entry object (instance of dfvfs.FileEntry). The default is None. """ offset = file_object.tell() try: record = self.RECORD_HEADER_APP.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning(( u'[{0:s}] Unsupported record header at 0x{1:08x} in file: ' u'{2:s}').format( self.NAME, offset, parser_context.GetDisplayName(file_entry))) return (ssgp_hash, creation_time, last_mod_time, text_description, comments, entry_name, account_name) = self._ReadEntryHeader( parser_context, file_entry, file_object, record.record_header, offset) # Move to the end of the record, and then, prepared for the next record. file_object.seek( record.record_header.entry_length + offset - file_object.tell(), os.SEEK_CUR) event_object = KeychainApplicationRecordEvent( creation_time, eventdata.EventTimestamp.CREATION_TIME, entry_name, account_name, text_description, comments, ssgp_hash) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) if creation_time != last_mod_time: event_object = KeychainApplicationRecordEvent( last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME, entry_name, account_name, text_description, comments, ssgp_hash) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) def _ReadEntryHeader( self, parser_context, file_entry, file_object, record, offset): """Read the common record attributes. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). file_object: A file-like object that points to an Keychain file. record: Structure with the header of the record. offset: First byte of the record. Returns: A list of: ssgp_hash: Hash of the encrypted data (passwd, cert, note). creation_time: When the entry was created. last_mod_time: Last time the entry was updated. text_description: A brief description of the entry. entry_name: Name of the entry account_name: Name of the account. """ # Info: The hash header always start with the string ssgp follow by # the hash. Furthermore The fields are always a multiple of four. # Then if it is not multiple the value is padded by 0x00. ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:]) file_object.seek( record.creation_time - file_object.tell() + offset - 1, os.SEEK_CUR) creation_time = self._GetTimestampFromEntry( parser_context, file_entry, self.TIME.parse_stream(file_object)) file_object.seek( record.last_mod_time - file_object.tell() + offset - 1, os.SEEK_CUR) last_mod_time = self._GetTimestampFromEntry( parser_context, file_entry, self.TIME.parse_stream(file_object)) # The comment field does not always contain data. if record.text_description: file_object.seek( record.text_description - file_object.tell() + offset -1, os.SEEK_CUR) text_description = self.TEXT.parse_stream(file_object) else: text_description = u'N/A' # The comment field does not always contain data. if record.comments: file_object.seek( record.text_description - file_object.tell() + offset -1, os.SEEK_CUR) comments = self.TEXT.parse_stream(file_object) else: comments = u'N/A' file_object.seek( record.entry_name - file_object.tell() + offset - 1, os.SEEK_CUR) entry_name = self.TEXT.parse_stream(file_object) file_object.seek( record.account_name - file_object.tell() + offset - 1, os.SEEK_CUR) account_name = self.TEXT.parse_stream(file_object) return ( ssgp_hash, creation_time, last_mod_time, text_description, comments, entry_name, account_name) def _ReadEntryInternet(self, parser_context, file_object, file_entry=None): """Extracts the information from an Internet password entry. Args: parser_context: A parser context object (instance of ParserContext). file_object: A file-like object that points to an Keychain file. file_entry: Optional file entry object (instance of dfvfs.FileEntry). The default is None. """ offset = file_object.tell() try: record = self.RECORD_HEADER_INET.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning(( u'[{0:s}] Unsupported record header at 0x{1:08x} in file: ' u'{2:s}').format( self.NAME, offset, parser_context.GetDisplayName(file_entry))) return (ssgp_hash, creation_time, last_mod_time, text_description, comments, entry_name, account_name) = self._ReadEntryHeader( parser_context, file_entry, file_object, record.record_header, offset) if not record.where: where = u'N/A' protocol = u'N/A' type_protocol = u'N/A' else: file_object.seek( record.where - file_object.tell() + offset - 1, os.SEEK_CUR) where = self.TEXT.parse_stream(file_object) file_object.seek( record.protocol - file_object.tell() + offset - 1, os.SEEK_CUR) protocol = self.TYPE_TEXT.parse_stream(file_object) file_object.seek( record.type - file_object.tell() + offset - 1, os.SEEK_CUR) type_protocol = self.TEXT.parse_stream(file_object) type_protocol = self._PROTOCOL_TRANSLATION_DICT.get( type_protocol, type_protocol) if record.url: file_object.seek( record.url - file_object.tell() + offset - 1, os.SEEK_CUR) url = self.TEXT.parse_stream(file_object) where = u'{0:s}{1:s}'.format(where, url) # Move to the end of the record, and then, prepared for the next record. file_object.seek( record.record_header.entry_length + offset - file_object.tell(), os.SEEK_CUR) event_object = KeychainInternetRecordEvent( creation_time, eventdata.EventTimestamp.CREATION_TIME, entry_name, account_name, text_description, comments, where, protocol, type_protocol, ssgp_hash) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) if creation_time != last_mod_time: event_object = KeychainInternetRecordEvent( last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME, entry_name, account_name, text_description, comments, where, protocol, type_protocol) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) def _VerifyStructure(self, file_object): """Verify that we are dealing with an Keychain entry. Args: file_object: A file-like object that points to an Keychain file. Returns: A list of table positions if it is a keychain, None otherwise. """ # INFO: The HEADER KEYCHAIN: # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z] # Where the table offset is relative to the first byte of the DB Schema, # then we must add to this offset the size of the [DBHEADER]. try: db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object) except (IOError, construct.FieldError): return if (db_header.minor_version != self.KEYCHAIN_MINOR_VERSION or db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or db_header.magic != self.KEYCHAIN_MAGIC_HEADER): return # Read the database schema and extract the offset for all the tables. # They are ordered by file position from the top to the bottom of the file. try: db_schema = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object) except (IOError, construct.FieldError): return table_offsets = [] for _ in range(db_schema.number_of_tables): try: table_offset = self.TABLE_OFFSET.parse_stream(file_object) except (IOError, construct.FieldError): return table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof()) return table_offsets def Parse(self, parser_context, file_entry): """Extract data from a Keychain file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() table_offsets = self._VerifyStructure(file_object) if not table_offsets: file_object.close() raise errors.UnableToParseFile(u'The file is not a Keychain file.') for table_offset in table_offsets: # Skipping X bytes, unknown data at this point. file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR) try: table = self.TABLE_HEADER.parse_stream(file_object) except construct.FieldError as exception: logging.warning(( u'[{0:s}] Unable to parse table header in file: {1:s} ' u'with error: {2:s}.').format( self.NAME, parser_context.GetDisplayName(file_entry), exception)) continue # Table_offset: absolute byte in the file where the table starts. # table.first_record: first record in the table, relative to the # first byte of the table. file_object.seek( table_offset + table.first_record - file_object.tell(), os.SEEK_CUR) if table.record_type == self.RECORD_TYPE_INTERNET: for _ in range(table.number_of_records): self._ReadEntryInternet( parser_context, file_object, file_entry=file_entry) elif table.record_type == self.RECORD_TYPE_APPLICATION: for _ in range(table.number_of_records): self._ReadEntryApplication( parser_context, file_object, file_entry=file_entry) file_object.close()
class CPIOArchiveFile(object): """CPIO archive file. Attributes: file_format (str): CPIO file format. """ # pylint: disable=no-member _CPIO_SIGNATURE_BINARY_BIG_ENDIAN = b'\x71\xc7' _CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN = b'\xc7\x71' _CPIO_SIGNATURE_PORTABLE_ASCII = b'070707' _CPIO_SIGNATURE_NEW_ASCII = b'070701' _CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM = b'070702' _CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct( 'cpio_binary_big_endian_file_entry', construct.UBInt16('signature'), construct.UBInt16('device_number'), construct.UBInt16('inode_number'), construct.UBInt16('mode'), construct.UBInt16('user_identifier'), construct.UBInt16('group_identifier'), construct.UBInt16('number_of_links'), construct.UBInt16('special_device_number'), construct.UBInt16('modification_time_upper'), construct.UBInt16('modification_time_lower'), construct.UBInt16('path_string_size'), construct.UBInt16('file_size_upper'), construct.UBInt16('file_size_lower')) _CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct( 'cpio_binary_little_endian_file_entry', construct.ULInt16('signature'), construct.ULInt16('device_number'), construct.ULInt16('inode_number'), construct.ULInt16('mode'), construct.ULInt16('user_identifier'), construct.ULInt16('group_identifier'), construct.ULInt16('number_of_links'), construct.ULInt16('special_device_number'), construct.ULInt16('modification_time_upper'), construct.ULInt16('modification_time_lower'), construct.ULInt16('path_string_size'), construct.ULInt16('file_size_upper'), construct.ULInt16('file_size_lower')) _CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT = construct.Struct( 'cpio_portable_ascii_file_entry', construct.Bytes('signature', 6), construct.Bytes('device_number', 6), construct.Bytes('inode_number', 6), construct.Bytes('mode', 6), construct.Bytes('user_identifier', 6), construct.Bytes('group_identifier', 6), construct.Bytes('number_of_links', 6), construct.Bytes('special_device_number', 6), construct.Bytes('modification_time', 11), construct.Bytes('path_string_size', 6), construct.Bytes('file_size', 11)) _CPIO_NEW_ASCII_FILE_ENTRY_STRUCT = construct.Struct( 'cpio_portable_ascii_file_entry', construct.Bytes('signature', 6), construct.Bytes('inode_number', 8), construct.Bytes('mode', 8), construct.Bytes('user_identifier', 8), construct.Bytes('group_identifier', 8), construct.Bytes('number_of_links', 8), construct.Bytes('modification_time', 8), construct.Bytes('file_size', 8), construct.Bytes('device_major_number', 8), construct.Bytes('device_minor_number', 8), construct.Bytes('special_device_major_number', 8), construct.Bytes('special_device_minor_number', 8), construct.Bytes('path_string_size', 8), construct.Bytes('checksum', 8)) def __init__(self): """Initializes the CPIO archive file object.""" super(CPIOArchiveFile, self).__init__() self._file_entries = None self._file_object = None self._file_object_opened_in_object = False self._file_size = 0 self.file_format = None def _ReadFileEntry(self, file_object, file_offset): """Reads a file entry. Args: file_object (FileIO): file-like object. file_offset (int): current file offset. Raises: IOError: if the file entry cannot be read. """ file_object.seek(file_offset, os.SEEK_SET) if self.file_format == 'bin-big-endian': file_entry_struct = self._CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT elif self.file_format == 'bin-little-endian': file_entry_struct = self._CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT elif self.file_format == 'odc': file_entry_struct = self._CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT elif self.file_format in ('crc', 'newc'): file_entry_struct = self._CPIO_NEW_ASCII_FILE_ENTRY_STRUCT file_entry_struct_size = file_entry_struct.sizeof() try: file_entry_struct = file_entry_struct.parse_stream(file_object) except construct.FieldError as exception: raise IOError( ('Unable to parse file entry data section with error: ' '{0:s}').format(exception)) file_offset += file_entry_struct_size if self.file_format in ('bin-big-endian', 'bin-little-endian'): inode_number = file_entry_struct.inode_number mode = file_entry_struct.mode user_identifier = file_entry_struct.user_identifier group_identifier = file_entry_struct.group_identifier modification_time = ( (file_entry_struct.modification_time_upper << 16) | file_entry_struct.modification_time_lower) path_string_size = file_entry_struct.path_string_size file_size = ((file_entry_struct.file_size_upper << 16) | file_entry_struct.file_size_lower) elif self.file_format == 'odc': inode_number = int(file_entry_struct.inode_number, 8) mode = int(file_entry_struct.mode, 8) user_identifier = int(file_entry_struct.user_identifier, 8) group_identifier = int(file_entry_struct.group_identifier, 8) modification_time = int(file_entry_struct.modification_time, 8) path_string_size = int(file_entry_struct.path_string_size, 8) file_size = int(file_entry_struct.file_size, 8) elif self.file_format in ('crc', 'newc'): inode_number = int(file_entry_struct.inode_number, 16) mode = int(file_entry_struct.mode, 16) user_identifier = int(file_entry_struct.user_identifier, 16) group_identifier = int(file_entry_struct.group_identifier, 16) modification_time = int(file_entry_struct.modification_time, 16) path_string_size = int(file_entry_struct.path_string_size, 16) file_size = int(file_entry_struct.file_size, 16) path_string_data = file_object.read(path_string_size) file_offset += path_string_size # TODO: should this be ASCII? path_string = path_string_data.decode('ascii') path_string, _, _ = path_string.partition('\x00') if self.file_format in ('bin-big-endian', 'bin-little-endian'): padding_size = file_offset % 2 if padding_size > 0: padding_size = 2 - padding_size elif self.file_format == 'odc': padding_size = 0 elif self.file_format in ('crc', 'newc'): padding_size = file_offset % 4 if padding_size > 0: padding_size = 4 - padding_size file_offset += padding_size file_entry = CPIOArchiveFileEntry() file_entry.data_offset = file_offset file_entry.data_size = file_size file_entry.group_identifier = group_identifier file_entry.inode_number = inode_number file_entry.modification_time = modification_time file_entry.path = path_string file_entry.mode = mode file_entry.size = (file_entry_struct_size + path_string_size + padding_size + file_size) file_entry.user_identifier = user_identifier file_offset += file_size if self.file_format in ('bin-big-endian', 'bin-little-endian'): padding_size = file_offset % 2 if padding_size > 0: padding_size = 2 - padding_size elif self.file_format == 'odc': padding_size = 0 elif self.file_format in ('crc', 'newc'): padding_size = file_offset % 4 if padding_size > 0: padding_size = 4 - padding_size if padding_size > 0: file_entry.size += padding_size return file_entry def _ReadFileEntries(self, file_object): """Reads the file entries from the cpio archive. Args: file_object (FileIO): file-like object. """ self._file_entries = {} file_offset = 0 while file_offset < self._file_size: file_entry = self._ReadFileEntry(file_object, file_offset) file_offset += file_entry.size if file_entry.path == 'TRAILER!!!': break if file_entry.path in self._file_entries: # TODO: alert on file entries with duplicate paths? continue self._file_entries[file_entry.path] = file_entry def Close(self): """Closes the CPIO archive file.""" self._file_entries = None self._file_object = None self._file_size = None def FileEntryExistsByPath(self, path): """Determines if file entry for a specific path exists. Returns: bool: True if the file entry exists. """ if self._file_entries is None: return False return path in self._file_entries def GetFileEntries(self, path_prefix=''): """Retrieves the file entries. Args: path_prefix (str): path prefix. Yields: CPIOArchiveFileEntry: a CPIO archive file entry. """ if self._file_entries: for path, file_entry in iter(self._file_entries.items()): if path.startswith(path_prefix): yield file_entry def GetFileEntryByPath(self, path): """Retrieves a file entry for a specific path. Returns: CPIOArchiveFileEntry: a CPIO archive file entry or None if not available. """ if self._file_entries: return self._file_entries.get(path, None) def Open(self, file_object): """Opens the CPIO archive file. Args: file_object (FileIO): a file-like object. Raises: IOError: if the file format signature is not supported. """ file_object.seek(0, os.SEEK_SET) signature_data = file_object.read(6) self.file_format = None if len(signature_data) > 2: if signature_data[:2] == self._CPIO_SIGNATURE_BINARY_BIG_ENDIAN: self.file_format = 'bin-big-endian' elif signature_data[: 2] == self._CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN: self.file_format = 'bin-little-endian' elif signature_data == self._CPIO_SIGNATURE_PORTABLE_ASCII: self.file_format = 'odc' elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII: self.file_format = 'newc' elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM: self.file_format = 'crc' if self.file_format is None: raise IOError('Unsupported CPIO format.') self._file_object = file_object self._file_size = file_object.get_size() self._ReadFileEntries(self._file_object) def ReadDataAtOffset(self, file_offset, size): """Reads a byte string from the file-like object at a specific offset. Args: file_offset (int): file offset. size (int): number of bytes to read. Returns: bytes: data read. Raises: IOError: if the read failed. """ self._file_object.seek(file_offset, os.SEEK_SET) return self._file_object.read(size)
class AslParser(interface.BaseParser): """Parser for ASL log files.""" NAME = 'asl_log' DESCRIPTION = u'Parser for ASL log files.' ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00' # If not right assigned, the value is "-1". ASL_NO_RIGHTS = 'ffffffff' # Priority level (criticity) ASL_MESSAGE_PRIORITY = { 0: 'EMERGENCY', 1: 'ALERT', 2: 'CRITICAL', 3: 'ERROR', 4: 'WARNING', 5: 'NOTICE', 6: 'INFO', 7: 'DEBUG' } # ASL File header. # magic: magic number that identify ASL files. # version: version of the file. # offset: first record in the file. # timestamp: epoch time when the first entry was written. # last_offset: last record in the file. ASL_HEADER_STRUCT = construct.Struct('asl_header_struct', construct.String('magic', 12), construct.UBInt32('version'), construct.UBInt64('offset'), construct.UBInt64('timestamp'), construct.UBInt32('cache_size'), construct.UBInt64('last_offset'), construct.Padding(36)) # The record structure is: # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry] # Record static structure. # tam_entry: it contains the number of bytes from this file position # until the end of the record, without counts itself. # next_offset: next record. If is equal to 0x00, it is the last record. # asl_message_id: integer that has the numeric identification of the event. # timestamp: Epoch integer that has the time when the entry was created. # nanosecond: nanosecond to add to the timestamp. # level: level of priority. # pid: process identification that ask to save the record. # uid: user identification that has lunched the process. # gid: group identification that has lunched the process. # read_uid: identification id of a user. Only applied if is not -1 (all FF). # Only root and this user can read the entry. # read_gid: the same than read_uid, but for the group. ASL_RECORD_STRUCT = construct.Struct('asl_record_struct', construct.Padding(2), construct.UBInt32('tam_entry'), construct.UBInt64('next_offset'), construct.UBInt64('asl_message_id'), construct.UBInt64('timestamp'), construct.UBInt32('nanosec'), construct.UBInt16('level'), construct.UBInt16('flags'), construct.UBInt32('pid'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('read_uid'), construct.UBInt32('read_gid'), construct.UBInt64('ref_pid')) ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof() # 8-byte fields, they can be: # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String]. # - Integer: integer that has the byte position in the file that points # to an ASL_RECORD_DYN_VALUE struct. If the value of the integer # is equal to 0, it means that it has not data (skip). # If the field is a String, we use this structure to decode each # integer byte in the corresponding character (ASCII Char). ASL_OCTET_STRING = construct.ExprAdapter(construct.Octet('string'), encoder=lambda obj, ctx: ord(obj), decoder=lambda obj, ctx: chr(obj)) # Field string structure. If the first bit is 1, it means that it # is a String (1000) = 8, then the next nibble has the number of # characters. The last 7 bytes are the number of bytes. ASL_STRING = construct.BitStruct( 'string', construct.Flag('type'), construct.Bits('filler', 3), construct.If(lambda ctx: ctx.type, construct.Nibble('string_length')), construct.If(lambda ctx: ctx.type, construct.Array(7, ASL_OCTET_STRING))) # 8-byte pointer to a byte position in the file. ASL_POINTER = construct.UBInt64('pointer') # Dynamic data structure pointed by a pointer that contains a String: # [2 bytes padding][4 bytes lenght of String][String]. ASL_RECORD_DYN_VALUE = construct.Struct( 'asl_record_dyn_value', construct.Padding(2), construct.PascalString('value', length_field=construct.UBInt32('length'))) def Parse(self, parser_context, file_entry): """Extract entries from an ASL file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() file_object.seek(0, os.SEEK_SET) try: header = self.ASL_HEADER_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: file_object.close() raise errors.UnableToParseFile( u'Unable to parse ASL Header with error: {0:s}.'.format( exception)) if header.magic != self.ASL_MAGIC: file_object.close() raise errors.UnableToParseFile( u'Not an ASL Header, unable to parse.') # Get the first and the last entry. offset = header.offset old_offset = header.offset last_offset_header = header.last_offset # If the ASL file has entries. if offset: event_object, offset = self.ReadAslEvent(file_object, offset) while event_object: parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) # TODO: an anomaly object must be emitted once that is implemented. # Sanity check, the last read element must be the same as # indicated by the header. if offset == 0 and old_offset != last_offset_header: logging.warning(u'Parsing ended before the header ends.') old_offset = offset event_object, offset = self.ReadAslEvent(file_object, offset) file_object.close() def ReadAslEvent(self, file_object, offset): """Returns an AslEvent from a single ASL entry. Args: file_object: a file-like object that points to an ASL file. offset: offset where the static part of the entry starts. Returns: An event object constructed from a single ASL record. """ # The heap of the entry is saved to try to avoid seek (performance issue). # It has the real start position of the entry. dynamic_start = file_object.tell() dynamic_part = file_object.read(offset - file_object.tell()) if not offset: return None, None try: record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to parse ASL event with error: {0:s}'.format( exception)) return None, None # Variable tam_fields = is the real length of the dynamic fields. # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before] # In Record_Struct we have a field called tam_entry, where it has the number # of bytes until the end of the entry from the position that the field is. # The tam_entry is between the 2th and the 6th byte in the [Record_Struct]. # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # Also, we do not need [Point_Entry_Before] and then we delete the size of # [Point_Entry_Before] that it is 8 bytes (8): # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8 # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2 tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2 # Dynamic part of the entry that contains minimal four fields of 8 bytes # plus 2x[8bytes] fields for each extra ASL_Field. # The four first fields are always the Host, Sender, Facility and Message. # After the four first fields, the entry might have extra ASL_Fields. # For each extra ASL_field, it has a pair of 8-byte fields where the first # 8 bytes contains the name of the extra ASL_field and the second 8 bytes # contains the text of the exta field. # All of this 8-byte field can be saved using one of these three differents # types: # - Null value ('0000000000000000'): nothing to do. # - String: It is string if first bit = 1 or first nibble = 8 (1000). # Second nibble has the length of string. # The next 7 bytes have the text characters of the string # padding the end with null characters: '0x00'. # Example: [8468 6964 6400 0000] # [8] String, [4] length, value: [68 69 64 64] = hidd. # - Pointer: static position in the file to a special struct # implemented as an ASL_RECORD_DYN_VALUE. # Example: [0000 0000 0000 0077] # It points to the file position 0x077 that has a # ASL_RECORD_DYN_VALUE structure. values = [] while tam_fields > 0: try: raw_field = file_object.read(8) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to parse ASL event with error: {0:d}'.format( exception)) return None, None try: # Try to read as a String. field = self.ASL_STRING.parse(raw_field) values.append(''.join(field.string[0:field.string_length])) # Go to parse the next extra field. tam_fields -= 8 continue except ValueError: pass # If it is not a string, it must be a pointer. try: field = self.ASL_POINTER.parse(raw_field) except ValueError as exception: logging.warning( u'Unable to parse ASL event with error: {0:s}'.format( exception)) return None, None if field != 0: # The next IF ELSE is only for performance issues, avoiding seek. # If the pointer points a lower position than where the actual entry # starts, it means that it points to a previuos entry. pos = field - dynamic_start # Bigger or equal 0 means that the data is in the actual entry. if pos >= 0: try: values.append((self.ASL_RECORD_DYN_VALUE.parse( dynamic_part[pos:])).value.partition('\x00')[0]) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to parse ASL event with error: {0:s}'. format(exception)) return None, None else: # Only if it is a pointer that points to the # heap from another entry we use the seek method. main_position = file_object.tell() # If the pointer is in a previous entry. if main_position > field: file_object.seek(field - main_position, os.SEEK_CUR) try: values.append( (self.ASL_RECORD_DYN_VALUE.parse_stream( file_object)).value.partition('\x00')[0]) except (IOError, construct.FieldError): logging.warning(( u'The pointer at {0:d} (0x{0:x}) points to invalid ' u'information.' ).format(main_position - self.ASL_POINTER.sizeof())) # Come back to the position in the entry. _ = file_object.read(main_position - file_object.tell()) else: _ = file_object.read(field - main_position) values.append((self.ASL_RECORD_DYN_VALUE.parse_stream( file_object)).value.partition('\x00')[0]) # Come back to the position in the entry. file_object.seek(main_position - file_object.tell(), os.SEEK_CUR) # Next extra field: 8 bytes more. tam_fields -= 8 # Read the last 8 bytes of the record that points to the previous entry. _ = file_object.read(8) # Parsed section, we translate the read data to an appropriate format. microsecond = record_header.nanosec // 1000 timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( record_header.timestamp, microsecond) record_position = offset message_id = record_header.asl_message_id level = u'{0} ({1})'.format( self.ASL_MESSAGE_PRIORITY[record_header.level], record_header.level) # If the value is -1 (FFFFFFFF), it can be read by everyone. if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16): read_uid = record_header.read_uid else: read_uid = 'ALL' if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16): read_gid = record_header.read_gid else: read_gid = 'ALL' # Parsing the dynamic values (text or pointers to position with text). # The first four are always the host, sender, facility, and message. computer_name = values[0] sender = values[1] facility = values[2] message = values[3] # If the entry has an extra fields, they works as a pairs: # The first is the name of the field and the second the value. extra_information = '' if len(values) > 4: values = values[4:] for index in xrange(0, len(values) // 2): extra_information += (u'[{0}: {1}]'.format( values[index * 2], values[(index * 2) + 1])) # Return the event and the offset for the next entry. return AslEvent(timestamp, record_position, message_id, level, record_header, read_uid, read_gid, computer_name, sender, facility, message, extra_information), record_header.next_offset
# MSc Project in Royal Holloway, University of London. __author__ = 'Joaquin Moreno Garijo ([email protected])' import construct import datetime import os import sys KEYCHAIN_MAGIC_HEADER = 'kych' KEYCHAIN_MAJOR_VERSION = 1 KEYCHAIN_MINOR_VERSION = 0 # DB HEADER KEYCHAIN_DB_HEADER = construct.Struct('db_header', construct.String('magic', 4), construct.UBInt16('major_version'), construct.UBInt16('minor_version'), construct.UBInt32('header_size'), construct.UBInt32('schema_offset'), construct.Padding(4)) # DB SCHEMA KEYCHAIN_DB_SCHEMA = construct.Struct('db_schema', construct.UBInt32('size'), construct.UBInt32('number_of_tables')) # For each umber_of_tables, the schema has a TABLE_OFFSET with the # offset starting in the DB_SCHEMA. TABLE_OFFSET = construct.UBInt32('table_offset') # TABLE TABLE_RECORD_TYPE = { 0: u'Schema information',
class FirefoxCacheParser(BaseFirefoxCacheParser): """Parses Firefox cache version 1 files (Firefox 31 or earlier).""" NAME = u'firefox_cache' DESCRIPTION = ( u'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).') _CACHE_VERSION = 1 # Initial size of Firefox 4 and later cache files. _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024 # Smallest possible block size in Firefox cache files. _MINUMUM_BLOCK_SIZE = 256 _CACHE_RECORD_HEADER_STRUCT = construct.Struct( u'record_header', construct.UBInt16(u'major'), construct.UBInt16(u'minor'), construct.UBInt32(u'location'), construct.UBInt32(u'fetch_count'), construct.UBInt32(u'last_fetched'), construct.UBInt32(u'last_modified'), construct.UBInt32(u'expire_time'), construct.UBInt32(u'data_size'), construct.UBInt32(u'request_size'), construct.UBInt32(u'info_size')) _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof() # TODO: change into regexp. _CACHE_FILENAME = (pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word(u'm', exact=1) + pyparsing.Word(pyparsing.nums, exact=2)) FIREFOX_CACHE_CONFIG = collections.namedtuple( u'firefox_cache_config', u'block_size first_record_offset') def _GetFirefoxConfig(self, file_object, display_name): """Determine cache file block size. Args: file_object: A file-like object. display_name: the display name. Raises: UnableToParseFile: if no valid cache record could be found. """ # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: # We have not yet determined the block size, so we use the smallest # possible size. fetched, _, _ = self._NextRecord(file_object, display_name, self._MINUMUM_BLOCK_SIZE) record_size = (self._CACHE_RECORD_HEADER_SIZE + fetched.request_size + fetched.info_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format( self.NAME, display_name, offset)) raise errors.UnableToParseFile( u'Could not find a valid cache record. Not a Firefox cache file.') def _NextRecord(self, file_object, display_name, block_size): """Provide the next cache record. Args: file_object: A file-like object. display_name: the display name. block_size: the block size. Returns: A tuple containing the fetched, modified and expire event objects (instances of EventObject) or None. """ offset = file_object.get_offset() try: cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream( file_object) except (IOError, construct.FieldError): raise IOError(u'Unable to parse stream.') if not self._ValidateCacheRecordHeader(cache_record_header): # Move reader to next candidate block. file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE file_object.seek(file_offset, os.SEEK_CUR) raise IOError(u'Not a valid Firefox cache record.') # The last byte in a request is null. url = file_object.read(cache_record_header.request_size)[:-1] # HTTP response header, even elements are keys, odd elements values. header_data = file_object.read(cache_record_header.info_size) request_method, response_code = self._ParseHTTPHeaders( header_data, offset, display_name) # A request can span multiple blocks, so we use modulo. file_offset = file_object.get_offset() - offset _, remainder = divmod(file_offset, block_size) # Move reader to next candidate block. Include the null-byte skipped above. file_object.seek(block_size - remainder, os.SEEK_CUR) cache_record_values = { u'data_size': cache_record_header.data_size, u'fetch_count': cache_record_header.fetch_count, u'info_size': cache_record_header.info_size, u'location': cache_record_header.location, u'major': cache_record_header.major, u'minor': cache_record_header.minor, u'request_method': request_method, u'request_size': cache_record_header.request_size, u'response_code': response_code, u'version': self._CACHE_VERSION, u'url': url } fetched = FirefoxCacheEvent(cache_record_header.last_fetched, eventdata.EventTimestamp.LAST_VISITED_TIME, cache_record_values) if cache_record_header.last_modified: modified = FirefoxCacheEvent(cache_record_header.last_modified, eventdata.EventTimestamp.WRITTEN_TIME, cache_record_values) else: modified = None if cache_record_header.expire_time: expire = FirefoxCacheEvent( cache_record_header.expire_time, eventdata.EventTimestamp.EXPIRATION_TIME, cache_record_values) else: expire = None return fetched, modified, expire def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Firefox cache file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() display_name = parser_mediator.GetDisplayName() try: # Match cache filename. Five hex characters + 'm' + two digit # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd' # instead contain data only. self._CACHE_FILENAME.parseString(filename) except pyparsing.ParseException: if not filename.startswith(u'_CACHE_00'): raise errors.UnableToParseFile(u'Not a Firefox cache1 file.') firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: fetched, modified, expire = self._NextRecord( file_object, display_name, firefox_config.block_size) parser_mediator.ProduceEvent(fetched) if modified: parser_mediator.ProduceEvent(modified) if expire: parser_mediator.ProduceEvent(expire) except IOError: file_offset = file_object.get_offset( ) - self._MINUMUM_BLOCK_SIZE logging.debug( (u'[{0:s}] Invalid cache record in file: {1:s} at offset: ' u'{2:d}.').format(self.NAME, display_name, file_offset))
Bytes = lambda name: c.ExprAdapter(c.OptionalGreedyRange(c.StaticField(name, 1)), encoder=lambda obj, ctx : list(obj), decoder=lambda obj, ctx : ''.join(obj) ) _commands = { 'choke' : [c.Magic('\x00')], 'unchoke' : [c.Magic('\x01')], 'interested' : [c.Magic('\x02')], 'uninterested' : [c.Magic('\x03')], 'have' : [c.Magic('\x04'), c.UBInt32('index')], 'bitfield' : [c.Magic('\x05'), Bytes('bits')], 'request' : [c.Magic('\x06'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')], 'piece' : [c.Magic('\x07'), c.UBInt32('index'), c.UBInt32('begin'), Bytes('data')], 'cancel' : [c.Magic('\x08'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')], 'port' : [c.Magic('\x09'), c.UBInt16('port')], 'extended' : [c.Magic('\x14'), c.UBInt8('cmd'), Bytes('msg')], } for k, v in _commands.items(): _commands[k] = c.Struct(k, *v) def build_handshake(info_hash, host_id, extensions): bits = bitarray.bitarray([0]*64, endian='little') for i in extensions: bits[i] = True obj = c.Container(info_hash=info_hash, peer_id=host_id, reserved=bits.tobytes())
class CupsIppParser(interface.BaseParser): """Parser for CUPS IPP files. """ NAME = 'cups_ipp' DESCRIPTION = u'Parser for CUPS IPP files.' # INFO: # For each file, we have only one document with three different timestamps: # Created, process and finished. # Format: # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] # GROUP ID: [1byte ID] # PAIR: [TagID][\x00][Name][Value]) # TagID: 1 byte integer with the type of "Value". # Name: [Length][Text][\00] # Name can be empty when the name has more than one value. # Example: family name "lopez mata" with more than one surname. # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] # Value: can be integer, boolean, or text provided by TagID. # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] # If integer, Value: [\x04][Integer] # If text, Value: [Length text][Text][\00] # Magic number that identify the CUPS IPP supported version. IPP_MAJOR_VERSION = 2 IPP_MINOR_VERSION = 0 # Supported Operation ID. IPP_OP_ID = 5 # CUPS IPP File header. CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct', construct.UBInt8('major_version'), construct.UBInt8('minor_version'), construct.UBInt16('operation_id'), construct.UBInt32('request_id')) # Group ID that indicates the end of the IPP Control file. GROUP_END = 3 # Identification Groups. GROUP_LIST = [1, 2, 4, 5, 6, 7] # Type ID. TYPE_GENERAL_INTEGER = 32 TYPE_INTEGER = 33 TYPE_ENUMERATION = 35 TYPE_BOOL = 34 # Type of values that can be extracted. INTEGER_8 = construct.UBInt8('integer') INTEGER_32 = construct.UBInt32('integer') TEXT = construct.PascalString('text', length_field=construct.UBInt8('length')) BOOLEAN = construct.Struct('boolean_value', construct.Padding(1), INTEGER_8) INTEGER = construct.Struct('integer_value', construct.Padding(1), INTEGER_32) # Name of the pair. PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1)) # Specific CUPS IPP to generic name. NAME_PAIR_TRANSLATION = { 'printer-uri': u'uri', 'job-uuid': u'job_id', 'DestinationPrinterID': u'printer_id', 'job-originating-user-name': u'user', 'job-name': u'job_name', 'document-format': u'doc_type', 'job-originating-host-name': u'computer_name', 'com.apple.print.JobInfo.PMApplicationName': u'application', 'com.apple.print.JobInfo.PMJobOwner': u'owner' } def Parse(self, parser_context, file_entry): """Extract a entry from an CUPS IPP file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() file_object.seek(0, os.SEEK_SET) try: header = self.CUPS_IPP_HEADER.parse_stream(file_object) except (IOError, construct.FieldError) as exception: file_object.close() raise errors.UnableToParseFile( u'Unable to parse CUPS IPP Header with error: {0:s}'.format( exception)) if (header.major_version != self.IPP_MAJOR_VERSION or header.minor_version != self.IPP_MINOR_VERSION): file_object.close() raise errors.UnableToParseFile( u'[{0:s}] Unsupported version number.'.format(self.NAME)) if header.operation_id != self.IPP_OP_ID: # Warn if the operation ID differs from the standard one. We should be # able to parse the file nonetheless. logging.debug( u'[{0:s}] Unsupported operation identifier in file: {1:s}.'. format(self.NAME, parser_context.GetDisplayName(file_entry))) # Read the pairs extracting the name and the value. data_dict = {} name, value = self.ReadPair(parser_context, file_entry, file_object) while name or value: # Translate the known "name" CUPS IPP to a generic name value. pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name) data_dict.setdefault(pretty_name, []).append(value) name, value = self.ReadPair(parser_context, file_entry, file_object) # Yield the events. if u'time-at-creation' in data_dict: event_object = CupsIppEvent(data_dict['time-at-creation'][0], eventdata.EventTimestamp.CREATION_TIME, data_dict) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if u'time-at-processing' in data_dict: event_object = CupsIppEvent(data_dict['time-at-processing'][0], eventdata.EventTimestamp.START_TIME, data_dict) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if u'time-at-completed' in data_dict: event_object = CupsIppEvent(data_dict['time-at-completed'][0], eventdata.EventTimestamp.END_TIME, data_dict) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) file_object.close() def ReadPair(self, parser_context, file_entry, file_object): """Reads an attribute name and value pair from a CUPS IPP event. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). file_object: a file-like object that points to a file. Returns: A list of name and value. If name and value cannot be read both are set to None. """ # Pair = Type ID + Name + Value. try: # Can be: # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. # IDtag = Tag ID (1byte) + '0x00'. type_id = self.INTEGER_8.parse_stream(file_object) if type_id == self.GROUP_END: return None, None elif type_id in self.GROUP_LIST: # If it is a group ID we must read the next byte that contains # the first TagID. type_id = self.INTEGER_8.parse_stream(file_object) # 0x00 separator character. _ = self.INTEGER_8.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported identifier in file: {1:s}.'.format( self.NAME, parser_context.GetDisplayName(file_entry))) return None, None # Name = Length name + name + 0x00 try: name = self.PAIR_NAME.parse_stream(file_object).text except (IOError, construct.FieldError): logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format( self.NAME, parser_context.GetDisplayName(file_entry))) return None, None # Value: can be integer, boolean or text select by Type ID. try: if type_id in [ self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION ]: value = self.INTEGER.parse_stream(file_object).integer elif type_id == self.TYPE_BOOL: value = bool(self.BOOLEAN.parse_stream(file_object).integer) else: value = self.TEXT.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'[{0:s}] Unsupported value in file: {1:s}.'.format( self.NAME, parser_context.GetDisplayName(file_entry))) return None, None return name, value
class JavaIDXParser(interface.FileObjectParser): """Parse Java WebStart Cache IDX files for download events. There are five structures defined. 6.02 files had one generic section that retained all data. From 6.03, the file went to a multi-section format where later sections were optional and had variable-lengths. 6.03, 6.04, and 6.05 files all have their main data section (#2) begin at offset 128. The short structure is because 6.05 files deviate after the 8th byte. So, grab the first 8 bytes to ensure it's valid, get the file version, then continue on with the correct structures. """ _INITIAL_FILE_OFFSET = None NAME = u'java_idx' DESCRIPTION = u'Parser for Java WebStart Cache IDX files.' IDX_SHORT_STRUCT = construct.Struct(u'magic', construct.UBInt8(u'busy'), construct.UBInt8(u'incomplete'), construct.UBInt32(u'idx_version')) IDX_602_STRUCT = construct.Struct( u'IDX_602_Full', construct.UBInt16(u'null_space'), construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'), construct.UBInt64(u'last_modified_date'), construct.UBInt64(u'expiration_date'), construct.PascalString(u'version_string', length_field=construct.UBInt16(u'length')), construct.PascalString(u'url', length_field=construct.UBInt16(u'length')), construct.PascalString(u'namespace', length_field=construct.UBInt16(u'length')), construct.UBInt32(u'FieldCount')) IDX_605_SECTION_ONE_STRUCT = construct.Struct( u'IDX_605_Section1', construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'), construct.UBInt64(u'last_modified_date'), construct.UBInt64(u'expiration_date'), construct.UBInt64(u'validation_date'), construct.UBInt8(u'signed'), construct.UBInt32(u'sec2len'), construct.UBInt32(u'sec3len'), construct.UBInt32(u'sec4len')) IDX_605_SECTION_TWO_STRUCT = construct.Struct( u'IDX_605_Section2', construct.PascalString(u'version', length_field=construct.UBInt16(u'length')), construct.PascalString(u'url', length_field=construct.UBInt16(u'length')), construct.PascalString(u'namespec', length_field=construct.UBInt16(u'length')), construct.PascalString(u'ip_address', length_field=construct.UBInt16(u'length')), construct.UBInt32(u'FieldCount')) # Java uses Pascal-style strings, but with a 2-byte length field. JAVA_READUTF_STRING = construct.Struct( u'Java.ReadUTF', construct.PascalString(u'string', length_field=construct.UBInt16(u'length'))) def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Java WebStart Cache IDX file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) try: magic = self.IDX_SHORT_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse Java IDX file with error: {0:s}.'.format( exception)) # Fields magic.busy and magic.incomplete are normally 0x00. They # are set to 0x01 if the file is currently being downloaded. Logic # checks for > 1 to avoid a race condition and still reject any # file with other data. # Field magic.idx_version is the file version, of which only # certain versions are supported. if magic.busy > 1 or magic.incomplete > 1: raise errors.UnableToParseFile(u'Not a valid Java IDX file') if not magic.idx_version in [602, 603, 604, 605]: raise errors.UnableToParseFile(u'Not a valid Java IDX file') # Obtain the relevant values from the file. The last modified date # denotes when the file was last modified on the HOST. For example, # when the file was uploaded to a web server. if magic.idx_version == 602: section_one = self.IDX_602_STRUCT.parse_stream(file_object) last_modified_date = section_one.last_modified_date url = section_one.url ip_address = u'Unknown' http_header_count = section_one.FieldCount elif magic.idx_version in [603, 604, 605]: # IDX 6.03 and 6.04 have two unused bytes before the structure. if magic.idx_version in [603, 604]: file_object.read(2) # IDX 6.03, 6.04, and 6.05 files use the same structures for the # remaining data. section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream( file_object) last_modified_date = section_one.last_modified_date if file_object.get_size() > 128: file_object.seek(128, os.SEEK_SET) # Static offset for section 2. section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream( file_object) url = section_two.url ip_address = section_two.ip_address http_header_count = section_two.FieldCount else: url = u'Unknown' ip_address = u'Unknown' http_header_count = 0 # File offset is now just prior to HTTP headers. Make sure there # are headers, and then parse them to retrieve the download date. download_date = None for field in range(0, http_header_count): field = self.JAVA_READUTF_STRING.parse_stream(file_object) value = self.JAVA_READUTF_STRING.parse_stream(file_object) if field.string == u'date': # Time string "should" be in UTC or have an associated time zone # information in the string itself. If that is not the case then # there is no reliable method for plaso to determine the proper # timezone, so the assumption is that it is UTC. try: download_date = timelib.Timestamp.FromTimeString( value.string, gmt_as_timezone=False) except errors.TimestampError: download_date = None parser_mediator.ProduceExtractionError( u'Unable to parse time value: {0:s}'.format( value.string)) if not url or not ip_address: raise errors.UnableToParseFile( u'Unexpected Error: URL or IP address not found in file.') event_data = JavaIDXEventData() event_data.idx_version = magic.idx_version event_data.ip_address = ip_address event_data.url = url date_time = dfdatetime_java_time.JavaTime(timestamp=last_modified_date) # TODO: Move the timestamp description into eventdata. event = time_events.DateTimeValuesEvent(date_time, u'File Hosted Date') parser_mediator.ProduceEventWithEventData(event, event_data) if section_one: expiration_date = section_one.get(u'expiration_date', None) if expiration_date: date_time = dfdatetime_java_time.JavaTime( timestamp=expiration_date) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_EXPIRATION) parser_mediator.ProduceEventWithEventData(event, event_data) if download_date: event = time_events.TimestampEvent( download_date, definitions.TIME_DESCRIPTION_FILE_DOWNLOADED) parser_mediator.ProduceEventWithEventData(event, event_data)
class FirefoxCacheParser(BaseFirefoxCacheParser): """Parses Firefox cache version 1 files (Firefox 31 or earlier).""" NAME = 'firefox_cache' DESCRIPTION = ( 'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).') _CACHE_VERSION = 1 # Initial size of Firefox 4 and later cache files. _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024 # Smallest possible block size in Firefox cache files. _MINUMUM_BLOCK_SIZE = 256 _CACHE_RECORD_HEADER_STRUCT = construct.Struct( 'record_header', construct.UBInt16('major'), construct.UBInt16('minor'), construct.UBInt32('location'), construct.UBInt32('fetch_count'), construct.UBInt32('last_fetched'), construct.UBInt32('last_modified'), construct.UBInt32('expire_time'), construct.UBInt32('data_size'), construct.UBInt32('request_size'), construct.UBInt32('info_size')) _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof() # TODO: change into regexp. _CACHE_FILENAME = (pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word('m', exact=1) + pyparsing.Word(pyparsing.nums, exact=2)) FIREFOX_CACHE_CONFIG = collections.namedtuple( 'firefox_cache_config', 'block_size first_record_offset') def _GetFirefoxConfig(self, file_object, display_name): """Determine cache file block size. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. Raises: UnableToParseFile: if no valid cache record could be found. """ # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: # We have not yet determined the block size, so we use the smallest # possible size. cache_record_header, _ = self._ReadCacheEntry( file_object, display_name, self._MINUMUM_BLOCK_SIZE) record_size = (self._CACHE_RECORD_HEADER_SIZE + cache_record_header.request_size + cache_record_header.info_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logging.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format( self.NAME, display_name, offset)) raise errors.UnableToParseFile( 'Could not find a valid cache record. Not a Firefox cache file.') def _ParseCacheEntry(self, parser_mediator, file_object, display_name, block_size): """Parses a cache entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. block_size (int): block size. """ cache_record_header, event_data = self._ReadCacheEntry( file_object, display_name, block_size) date_time = dfdatetime_posix_time.PosixTime( timestamp=cache_record_header.last_fetched) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_LAST_VISITED) parser_mediator.ProduceEventWithEventData(event, event_data) if cache_record_header.last_modified: date_time = dfdatetime_posix_time.PosixTime( timestamp=cache_record_header.last_modified) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) if cache_record_header.expire_time: date_time = dfdatetime_posix_time.PosixTime( timestamp=cache_record_header.expire_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_EXPIRATION) parser_mediator.ProduceEventWithEventData(event, event_data) def _ReadCacheEntry(self, file_object, display_name, block_size): """Reads a cache entry. Args: file_object (dfvfs.FileIO): a file-like object. display_name (str): display name. block_size (int): block size. Returns: tuple: contains: construct.Stuct: cache record header structure. FirefoxCacheEventData: event data. """ offset = file_object.get_offset() try: cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream( file_object) except (IOError, construct.FieldError): raise IOError('Unable to parse stream.') if not self._ValidateCacheRecordHeader(cache_record_header): # Move reader to next candidate block. file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE file_object.seek(file_offset, os.SEEK_CUR) raise IOError('Not a valid Firefox cache record.') # The URL string is NUL-terminated. url = file_object.read(cache_record_header.request_size)[:-1] # HTTP response header, even elements are keys, odd elements values. header_data = file_object.read(cache_record_header.info_size) request_method, response_code = self._ParseHTTPHeaders( header_data, offset, display_name) # A request can span multiple blocks, so we use modulo. file_offset = file_object.get_offset() - offset _, remainder = divmod(file_offset, block_size) # Move reader to next candidate block. Include the null-byte skipped above. file_object.seek(block_size - remainder, os.SEEK_CUR) event_data = FirefoxCacheEventData() event_data.data_size = cache_record_header.data_size event_data.fetch_count = cache_record_header.fetch_count event_data.info_size = cache_record_header.info_size event_data.location = cache_record_header.location event_data.major = cache_record_header.major event_data.minor = cache_record_header.minor event_data.request_method = request_method event_data.request_size = cache_record_header.request_size event_data.response_code = response_code event_data.url = url event_data.version = self._CACHE_VERSION return cache_record_header, event_data def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a Firefox cache file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ filename = parser_mediator.GetFilename() display_name = parser_mediator.GetDisplayName() try: # Match cache filename. Five hex characters + 'm' + two digit # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd' # instead contain data only. self._CACHE_FILENAME.parseString(filename) except pyparsing.ParseException: if not filename.startswith('_CACHE_00'): raise errors.UnableToParseFile('Not a Firefox cache1 file.') firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: self._ParseCacheEntry(parser_mediator, file_object, display_name, firefox_config.block_size) except IOError: file_offset = file_object.get_offset( ) - self._MINUMUM_BLOCK_SIZE logging.debug( ('[{0:s}] Invalid cache record in file: {1:s} at offset: ' '{2:d}.').format(self.NAME, display_name, file_offset))
) announce_req = c.Struct('request', c.UBInt64('connection_id'), c.UBInt32('action'), c.UBInt32('transaction_id'), c.Bytes('info_hash', 20), c.Bytes('peer_id', 20), c.UBInt64('downloaded'), c.UBInt64('left'), c.UBInt64('uploaded'), c.UBInt32('event'), c.UBInt32('ip_addr'), c.UBInt32('key'), c.SBInt32('num_want'), c.UBInt16('port'), ) announce_resp = c.Struct('response', c.UBInt32('action'), c.UBInt32('transaction_id'), c.UBInt32('interval'), c.UBInt32('leechers'), c.UBInt32('seeders'), c.GreedyRange( c.Struct('peer', c.Array(4, c.UBInt8('addr')), c.UBInt16('port') ) ) )
class BSMParser(interface.FileObjectParser): """Parser for BSM files.""" NAME = 'bsm_log' DESCRIPTION = 'Parser for BSM log files.' # BSM supported version (0x0b = 11). AUDIT_HEADER_VERSION = 11 # Magic Trail Header. BSM_TOKEN_TRAILER_MAGIC = b'b105' # IP Version constants. AU_IPv4 = 4 AU_IPv6 = 16 IPV4_STRUCT = construct.UBInt32('ipv4') IPV6_STRUCT = construct.Struct( 'ipv6', construct.UBInt64('high'), construct.UBInt64('low')) # Tested structures. # INFO: I have ommited the ID in the structures declaration. # I used the BSM_TYPE first to read the ID, and then, the structure. # Tokens always start with an ID value that identifies their token # type and subsequent structure. _BSM_TOKEN = construct.UBInt8('token_id') # Data type structures. BSM_TOKEN_DATA_CHAR = construct.String('value', 1) BSM_TOKEN_DATA_SHORT = construct.UBInt16('value') BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value') # Common structure used by other structures. # audit_uid: integer, uid that generates the entry. # effective_uid: integer, the permission user used. # effective_gid: integer, the permission group used. # real_uid: integer, user id of the user that execute the process. # real_gid: integer, group id of the group that execute the process. # pid: integer, identification number of the process. # session_id: unknown, need research. BSM_TOKEN_SUBJECT_SHORT = construct.Struct( 'subject_data', construct.UBInt32('audit_uid'), construct.UBInt32('effective_uid'), construct.UBInt32('effective_gid'), construct.UBInt32('real_uid'), construct.UBInt32('real_gid'), construct.UBInt32('pid'), construct.UBInt32('session_id')) # Common structure used by other structures. # Identify the kind of inet (IPv4 or IPv6) # TODO: instead of 16, AU_IPv6 must be used. BSM_IP_TYPE_SHORT = construct.Struct( 'bsm_ip_type_short', construct.UBInt32('net_type'), construct.Switch( 'ip_addr', _BSMTokenGetNetType, {16: IPV6_STRUCT}, default=IPV4_STRUCT)) # Initial fields structure used by header structures. # length: integer, the length of the entry, equal to trailer (doc: length). # version: integer, version of BSM (AUDIT_HEADER_VERSION). # event_type: integer, the type of event (/etc/security/audit_event). # modifier: integer, unknown, need research (It is always 0). BSM_HEADER = construct.Struct( 'bsm_header', construct.UBInt32('length'), construct.UBInt8('version'), construct.UBInt16('event_type'), construct.UBInt16('modifier')) # First token of one entry. # timestamp: unsigned integer, number of seconds since # January 1, 1970 00:00:00 UTC. # microseconds: unsigned integer, number of micro seconds. BSM_HEADER32 = construct.Struct( 'bsm_header32', BSM_HEADER, construct.UBInt32('timestamp'), construct.UBInt32('microseconds')) BSM_HEADER64 = construct.Struct( 'bsm_header64', BSM_HEADER, construct.UBInt64('timestamp'), construct.UBInt64('microseconds')) BSM_HEADER32_EX = construct.Struct( 'bsm_header32_ex', BSM_HEADER, BSM_IP_TYPE_SHORT, construct.UBInt32('timestamp'), construct.UBInt32('microseconds')) # Token TEXT, provides extra information. BSM_TOKEN_TEXT = construct.Struct( 'bsm_token_text', construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # Path of the executable. BSM_TOKEN_PATH = BSM_TOKEN_TEXT # Identified the end of the record (follow by TRAILER). # status: integer that identifies the status of the exit (BSM_ERRORS). # return: returned value from the operation. BSM_TOKEN_RETURN32 = construct.Struct( 'bsm_token_return32', construct.UBInt8('status'), construct.UBInt32('return_value')) BSM_TOKEN_RETURN64 = construct.Struct( 'bsm_token_return64', construct.UBInt8('status'), construct.UBInt64('return_value')) # Identified the number of bytes that was written. # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC). # length: integer that has the number of bytes from the entry size. BSM_TOKEN_TRAILER = construct.Struct( 'bsm_token_trailer', construct.UBInt16('magic'), construct.UBInt32('record_length')) # A 32-bits argument. # num_arg: the number of the argument. # name_arg: the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT32 = construct.Struct( 'bsm_token_argument32', construct.UBInt8('num_arg'), construct.UBInt32('name_arg'), construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # A 64-bits argument. # num_arg: integer, the number of the argument. # name_arg: text, the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT64 = construct.Struct( 'bsm_token_argument64', construct.UBInt8('num_arg'), construct.UBInt64('name_arg'), construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # Identify an user. # terminal_id: unknown, research needed. # terminal_addr: unknown, research needed. BSM_TOKEN_SUBJECT32 = construct.Struct( 'bsm_token_subject32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), IPV4_STRUCT) # Identify an user using a extended Token. # terminal_port: unknown, need research. # net_type: unknown, need research. BSM_TOKEN_SUBJECT32_EX = construct.Struct( 'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), BSM_IP_TYPE_SHORT) # au_to_opaque // AUT_OPAQUE BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT # au_to_seq // AUT_SEQ BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER # Program execution with options. # For each argument we are going to have a string+ "\x00". # Example: [00 00 00 02][41 42 43 00 42 42 00] # 2 Arguments, Arg1: [414243] Arg2: [4242]. BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments') BSM_TOKEN_EXEC_ARGUMENT = construct.Struct( 'bsm_token_exec_argument', construct.RepeatUntil( _BSMTokenIsEndOfString, construct.StaticField("text", 1))) # au_to_in_addr // AUT_IN_ADDR: BSM_TOKEN_ADDR = IPV4_STRUCT # au_to_in_addr_ext // AUT_IN_ADDR_EX: BSM_TOKEN_ADDR_EXT = construct.Struct( 'bsm_token_addr_ext', construct.UBInt32('net_type'), IPV6_STRUCT) # au_to_ip // AUT_IP: # TODO: parse this header in the correct way. BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20) # au_to_ipc // AUT_IPC: BSM_TOKEN_IPC = construct.Struct( 'bsm_token_ipc', construct.UBInt8('object_type'), construct.UBInt32('object_id')) # au_to_ipc_perm // au_to_ipc_perm BSM_TOKEN_IPC_PERM = construct.Struct( 'bsm_token_ipc_perm', construct.UBInt32('user_id'), construct.UBInt32('group_id'), construct.UBInt32('creator_user_id'), construct.UBInt32('creator_group_id'), construct.UBInt32('access_mode'), construct.UBInt32('slot_seq'), construct.UBInt32('key')) # au_to_iport // AUT_IPORT: BSM_TOKEN_PORT = construct.UBInt16('port_number') # au_to_file // AUT_OTHER_FILE32: BSM_TOKEN_FILE = construct.Struct( 'bsm_token_file', construct.UBInt32('timestamp'), construct.UBInt32('microseconds'), construct.UBInt16('length'), construct.Array(_BSMTokenGetLength, construct.UBInt8('text'))) # au_to_subject64 // AUT_SUBJECT64: BSM_TOKEN_SUBJECT64 = construct.Struct( 'bsm_token_subject64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64('terminal_port'), IPV4_STRUCT) # au_to_subject64_ex // AU_IPv4: BSM_TOKEN_SUBJECT64_EX = construct.Struct( 'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), construct.UBInt32('terminal_type'), BSM_IP_TYPE_SHORT) # au_to_process32 // AUT_PROCESS32: BSM_TOKEN_PROCESS32 = construct.Struct( 'bsm_token_process32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), IPV4_STRUCT) # au_to_process64 // AUT_PROCESS32: BSM_TOKEN_PROCESS64 = construct.Struct( 'bsm_token_process64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64('terminal_port'), IPV4_STRUCT) # au_to_process32_ex // AUT_PROCESS32_EX: BSM_TOKEN_PROCESS32_EX = construct.Struct( 'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32('terminal_port'), BSM_IP_TYPE_SHORT) # au_to_process64_ex // AUT_PROCESS64_EX: BSM_TOKEN_PROCESS64_EX = construct.Struct( 'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64('terminal_port'), BSM_IP_TYPE_SHORT) # au_to_sock_inet32 // AUT_SOCKINET32: BSM_TOKEN_AUT_SOCKINET32 = construct.Struct( 'bsm_token_aut_sockinet32', construct.UBInt16('net_type'), construct.UBInt16('port_number'), IPV4_STRUCT) # Info: checked against the source code of XNU, but not against # real BSM file. BSM_TOKEN_AUT_SOCKINET128 = construct.Struct( 'bsm_token_aut_sockinet128', construct.UBInt16('net_type'), construct.UBInt16('port_number'), IPV6_STRUCT) INET6_ADDR_TYPE = construct.Struct( 'addr_type', construct.UBInt16('ip_type'), construct.UBInt16('source_port'), construct.UBInt64('saddr_high'), construct.UBInt64('saddr_low'), construct.UBInt16('destination_port'), construct.UBInt64('daddr_high'), construct.UBInt64('daddr_low')) INET4_ADDR_TYPE = construct.Struct( 'addr_type', construct.UBInt16('ip_type'), construct.UBInt16('source_port'), construct.UBInt32('source_address'), construct.UBInt16('destination_port'), construct.UBInt32('destination_address')) # au_to_socket_ex // AUT_SOCKET_EX # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6. BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct( 'bsm_token_aut_sockinet32_ex', construct.UBInt16('socket_domain'), construct.UBInt16('socket_type'), construct.Switch( 'structure_addr_port', _BSMTokenGetSocketDomain, {26: INET6_ADDR_TYPE}, default=INET4_ADDR_TYPE)) # au_to_sock_unix // AUT_SOCKUNIX BSM_TOKEN_SOCKET_UNIX = construct.Struct( 'bsm_token_au_to_sock_unix', construct.UBInt16('family'), construct.RepeatUntil( _BSMTokenIsEndOfString, construct.StaticField("path", 1))) # au_to_data // au_to_data # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT. # type: bsmtoken.BSM_TOKEN_DATA_TYPE. # unit_count: number of type values. # BSM_TOKEN_DATA has a end field = type * unit_count BSM_TOKEN_DATA = construct.Struct( 'bsm_token_data', construct.UBInt8('how_to_print'), construct.UBInt8('data_type'), construct.UBInt8('unit_count')) # au_to_attr32 // AUT_ATTR32 BSM_TOKEN_ATTR32 = construct.Struct( 'bsm_token_attr32', construct.UBInt32('file_mode'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('file_system_id'), construct.UBInt64('file_system_node_id'), construct.UBInt32('device')) # au_to_attr64 // AUT_ATTR64 BSM_TOKEN_ATTR64 = construct.Struct( 'bsm_token_attr64', construct.UBInt32('file_mode'), construct.UBInt32('uid'), construct.UBInt32('gid'), construct.UBInt32('file_system_id'), construct.UBInt64('file_system_node_id'), construct.UBInt64('device')) # au_to_exit // AUT_EXIT BSM_TOKEN_EXIT = construct.Struct( 'bsm_token_exit', construct.UBInt32('status'), construct.UBInt32('return_value')) # au_to_newgroups // AUT_NEWGROUPS # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group. BSM_TOKEN_GROUPS = construct.UBInt16('group_number') # au_to_exec_env == au_to_exec_args BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS # au_to_zonename //AUT_ZONENAME BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT # Token ID. # List of valid Token_ID. # Token_ID -> (NAME_STRUCTURE, STRUCTURE) # Only the checked structures are been added to the valid structures lists. _BSM_TOKEN_TYPES = { 17: ('BSM_TOKEN_FILE', BSM_TOKEN_FILE), 19: ('BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER), 20: ('BSM_HEADER32', BSM_HEADER32), 21: ('BSM_HEADER64', BSM_HEADER64), 33: ('BSM_TOKEN_DATA', BSM_TOKEN_DATA), 34: ('BSM_TOKEN_IPC', BSM_TOKEN_IPC), 35: ('BSM_TOKEN_PATH', BSM_TOKEN_PATH), 36: ('BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32), 38: ('BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32), 39: ('BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32), 40: ('BSM_TOKEN_TEXT', BSM_TOKEN_TEXT), 41: ('BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE), 42: ('BSM_TOKEN_ADDR', BSM_TOKEN_ADDR), 43: ('BSM_TOKEN_IP', BSM_TOKEN_IP), 44: ('BSM_TOKEN_PORT', BSM_TOKEN_PORT), 45: ('BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32), 47: ('BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE), 96: ('BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME), 113: ('BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64), 114: ('BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64), 116: ('BSM_HEADER32_EX', BSM_HEADER32_EX), 119: ('BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64), 122: ('BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX), 127: ('BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX), 128: ('BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32)} # Untested structures. # When not tested structure is found, we try to parse using also # these structures. BSM_TYPE_LIST_NOT_TESTED = { 49: ('BSM_TOKEN_ATTR', BSM_TOKEN_ATTR32), 50: ('BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM), 52: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS), 59: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS), 60: ('BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS), 61: ('BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV), 62: ('BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32), 82: ('BSM_TOKEN_EXIT', BSM_TOKEN_EXIT), 115: ('BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64), 117: ('BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64), 123: ('BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX), 124: ('BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX), 125: ('BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX), 126: ('BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT), 129: ('BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128), 130: ('BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX)} MESSAGE_CAN_NOT_SAVE = ( 'Plaso: some tokens from this entry can not be saved. Entry at 0x{0:X} ' 'with unknown token id "0x{1:X}".') # BSM token types: # https://github.com/openbsm/openbsm/blob/master/sys/bsm/audit_record.h _BSM_TOKEN_TYPE_ARGUMENT32 = 45 _BSM_TOKEN_TYPE_ARGUMENT64 = 113 _BSM_TOKEN_TYPE_ATTR = 49 _BSM_TOKEN_TYPE_ATTR32 = 62 _BSM_TOKEN_TYPE_ATTR64 = 115 _BSM_TOKEN_TYPE_EXEC_ARGUMENTS = 60 _BSM_TOKEN_TYPE_EXEC_ENV = 61 _BSM_TOKEN_TYPE_EXIT = 82 _BSM_TOKEN_TYPE_HEADER32 = 20 _BSM_TOKEN_TYPE_HEADER32_EX = 116 _BSM_TOKEN_TYPE_HEADER64 = 21 _BSM_TOKEN_TYPE_PATH = 35 _BSM_TOKEN_TYPE_PROCESS32 = 38 _BSM_TOKEN_TYPE_PROCESS32_EX = 123 _BSM_TOKEN_TYPE_PROCESS64 = 119 _BSM_TOKEN_TYPE_PROCESS64_EX = 124 _BSM_TOKEN_TYPE_RETURN32 = 39 _BSM_TOKEN_TYPE_RETURN64 = 114 _BSM_TOKEN_TYPE_SUBJECT32 = 36 _BSM_TOKEN_TYPE_SUBJECT32_EX = 122 _BSM_TOKEN_TYPE_SUBJECT64 = 117 _BSM_TOKEN_TYPE_SUBJECT64_EX = 125 _BSM_TOKEN_TYPE_TEXT = 40 _BSM_TOKEN_TYPE_ZONENAME = 96 _BSM_ARGUMENT_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_ARGUMENT32, _BSM_TOKEN_TYPE_ARGUMENT64) _BSM_ATTR_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_ATTR, _BSM_TOKEN_TYPE_ATTR32, _BSM_TOKEN_TYPE_ATTR64) _BSM_EXEV_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_EXEC_ARGUMENTS, _BSM_TOKEN_TYPE_EXEC_ENV) _BSM_HEADER_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_HEADER32, _BSM_TOKEN_TYPE_HEADER32_EX, _BSM_TOKEN_TYPE_HEADER64) _BSM_PROCESS_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_PROCESS32, _BSM_TOKEN_TYPE_PROCESS64) _BSM_PROCESS_EX_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_PROCESS32_EX, _BSM_TOKEN_TYPE_PROCESS64_EX) _BSM_RETURN_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_EXIT, _BSM_TOKEN_TYPE_RETURN32, _BSM_TOKEN_TYPE_RETURN64) _BSM_SUBJECT_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_SUBJECT32, _BSM_TOKEN_TYPE_SUBJECT64) _BSM_SUBJECT_EX_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_SUBJECT32_EX, _BSM_TOKEN_TYPE_SUBJECT64_EX) _BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES = ( _BSM_TOKEN_TYPE_PATH, _BSM_TOKEN_TYPE_TEXT, _BSM_TOKEN_TYPE_ZONENAME) def __init__(self): """Initializes a parser object.""" super(BSMParser, self).__init__() # Create the dictionary with all token IDs: tested and untested. self._bsm_type_list_all = self._BSM_TOKEN_TYPES.copy() self._bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED) def _CopyByteArrayToBase16String(self, byte_array): """Copies a byte array into a base-16 encoded Unicode string. Args: byte_array (bytes): A byte array. Returns: str: a base-16 encoded Unicode string. """ return ''.join(['{0:02x}'.format(byte) for byte in byte_array]) def _CopyUtf8ByteArrayToString(self, byte_array): """Copies a UTF-8 encoded byte array into a Unicode string. Args: byte_array (bytes): A byte array containing an UTF-8 encoded string. Returns: str: A Unicode string. """ byte_stream = b''.join(map(chr, byte_array)) try: string = byte_stream.decode('utf-8') except UnicodeDecodeError: logging.warning('Unable to decode UTF-8 formatted byte array.') string = byte_stream.decode('utf-8', errors='ignore') string, _, _ = string.partition(b'\x00') return string def _IPv4Format(self, address): """Formats an IPv4 address as a human readable string. Args: address (int): IPv4 address. Returns: str: human readable string of IPv4 address in 4 octet representation: "1.2.3.4". """ ipv4_string = self.IPV4_STRUCT.build(address) return socket.inet_ntoa(ipv4_string) def _IPv6Format(self, high, low): """Formats an IPv6 address as a human readable string. Args: high (int): upper 64-bit part of the IPv6 address. low (int): lower 64-bit part of the IPv6 address. Returns: str: human readable string of IPv6 address. """ ipv6_string = self.IPV6_STRUCT.build( construct.Container(high=high, low=low)) # socket.inet_ntop not supported in Windows. if hasattr(socket, 'inet_ntop'): return socket.inet_ntop(socket.AF_INET6, ipv6_string) # TODO: this approach returns double "::", illegal IPv6 addr. str_address = binascii.hexlify(ipv6_string) address = [] blank = False for pos in range(0, len(str_address), 4): if str_address[pos:pos + 4] == '0000': if not blank: address.append('') blank = True else: blank = False address.append(str_address[pos:pos + 4].lstrip('0')) return ':'.join(address) def _ParseBSMEvent(self, parser_mediator, file_object): """Parses a BSM entry (BSMEvent) from the file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Returns: bool: True if the BSM entry was parsed. """ record_start_offset = file_object.tell() try: token_type = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError(( 'unable to parse BSM token type at offset: 0x{0:08x} with error: ' '{1:s}.').format(record_start_offset, exception)) return False if token_type not in self._BSM_HEADER_TOKEN_TYPES: parser_mediator.ProduceExtractionError( 'unsupported token type: {0:d} at offset: 0x{1:08x}.'.format( token_type, record_start_offset)) # TODO: if it is a Mac OS X, search for the trailer magic value # as a end of the entry can be a possibility to continue. return False _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None)) try: token = record_structure.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError(( 'unable to parse BSM record at offset: 0x{0:08x} with error: ' '{1:s}.').format(record_start_offset, exception)) return False event_type = bsmtoken.BSM_AUDIT_EVENT.get( token.bsm_header.event_type, 'UNKNOWN') event_type = '{0:s} ({1:d})'.format( event_type, token.bsm_header.event_type) timestamp = (token.timestamp * 1000000) + token.microseconds date_time = dfdatetime_posix_time.PosixTimeInMicroseconds( timestamp=timestamp) record_length = token.bsm_header.length record_end_offset = record_start_offset + record_length # A dict of tokens that has the entry. extra_tokens = {} # Read until we reach the end of the record. while file_object.tell() < record_end_offset: # Check if it is a known token. try: token_type = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( 'Unable to parse the Token ID at position: {0:d}'.format( file_object.tell())) return False _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None)) if not record_structure: pending = record_end_offset - file_object.tell() new_extra_tokens = self.TryWithUntestedStructures( file_object, token_type, pending) extra_tokens.update(new_extra_tokens) else: token = record_structure.parse_stream(file_object) new_extra_tokens = self.FormatToken(token_type, token, file_object) extra_tokens.update(new_extra_tokens) if file_object.tell() > record_end_offset: logging.warning( 'Token ID {0:d} not expected at position 0x{1:08x}.' 'Jumping for the next entry.'.format( token_type, file_object.tell())) try: file_object.seek( record_end_offset - file_object.tell(), os.SEEK_CUR) except (IOError, construct.FieldError) as exception: logging.warning( 'Unable to jump to next entry with error: {0:s}'.format(exception)) return False # BSM can be in more than one OS: BSD, Solaris and Mac OS X. if parser_mediator.platform != 'MacOSX': event_data = BSMEventData() else: event_data = MacBSMEventData() # In Mac OS X the last two tokens are the return status and the trailer. return_value = extra_tokens.get('BSM_TOKEN_RETURN32') if not return_value: return_value = extra_tokens.get('BSM_TOKEN_RETURN64') if not return_value: return_value = 'UNKNOWN' event_data.return_value = return_value event_data.event_type = event_type event_data.extra_tokens = extra_tokens event_data.offset = record_start_offset event_data.record_length = record_length # TODO: check why trailer was passed to event in original while # event was expecting record length. # if extra_tokens: # trailer = extra_tokens.get('BSM_TOKEN_TRAILER', 'unknown') event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_CREATION) parser_mediator.ProduceEventWithEventData(event, event_data) return True def _RawToUTF8(self, byte_stream): """Copies a UTF-8 byte stream into a Unicode string. Args: byte_stream (bytes): byte stream containing an UTF-8 encoded string. Returns: str: A Unicode string. """ try: string = byte_stream.decode('utf-8') except UnicodeDecodeError: logging.warning( 'Decode UTF8 failed, the message string may be cut short.') string = byte_stream.decode('utf-8', errors='ignore') return string.partition(b'\x00')[0] def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a BSM file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: is_bsm = self.VerifyFile(parser_mediator, file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( 'Unable to parse BSM file with error: {0:s}'.format(exception)) if not is_bsm: raise errors.UnableToParseFile('Not a BSM File, unable to parse.') file_object.seek(0, os.SEEK_SET) while self._ParseBSMEvent(parser_mediator, file_object): pass def VerifyFile(self, parser_mediator, file_object): """Check if the file is a BSM file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Returns: bool: True if this is a valid BSM file, False otherwise. """ # First part of the entry is always a Header. try: token_type = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): return False if token_type not in self._BSM_HEADER_TOKEN_TYPES: return False _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None)) try: header = record_structure.parse_stream(file_object) except (IOError, construct.FieldError): return False if header.bsm_header.version != self.AUDIT_HEADER_VERSION: return False try: token_identifier = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): return False # If is Mac OS X BSM file, next entry is a text token indicating # if it is a normal start or it is a recovery track. if parser_mediator.platform == 'MacOSX': token_type, record_structure = self._BSM_TOKEN_TYPES.get( token_identifier, ('', None)) if not record_structure: return False if token_type != 'BSM_TOKEN_TEXT': logging.warning('It is not a valid first entry for Mac OS X BSM.') return False try: token = record_structure.parse_stream(file_object) except (IOError, construct.FieldError): return text = self._CopyUtf8ByteArrayToString(token.text) if (text != 'launchctl::Audit startup' and text != 'launchctl::Audit recovery'): logging.warning('It is not a valid first entry for Mac OS X BSM.') return False return True def TryWithUntestedStructures(self, file_object, token_id, pending): """Try to parse the pending part of the entry using untested structures. Args: file_object: BSM file. token_id: integer with the id that comes from the unknown token. pending: pending length of the entry. Returns: A list of extra tokens data that can be parsed using non-tested structures. A message indicating that a structure cannot be parsed is added for unparsed structures. """ # Data from the unknown structure. start_position = file_object.tell() start_token_id = token_id extra_tokens = {} # Read all the "pending" bytes. try: if token_id in self._bsm_type_list_all: token = self._bsm_type_list_all[token_id][1].parse_stream(file_object) new_extra_tokens = self.FormatToken(token_id, token, file_object) extra_tokens.update(new_extra_tokens) while file_object.tell() < (start_position + pending): # Check if it is a known token. try: token_id = self._BSM_TOKEN.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( 'Unable to parse the Token ID at position: {0:d}'.format( file_object.tell())) return if token_id not in self._bsm_type_list_all: break token = self._bsm_type_list_all[token_id][1].parse_stream(file_object) new_extra_tokens = self.FormatToken(token_id, token, file_object) extra_tokens.update(new_extra_tokens) except (IOError, construct.FieldError): token_id = 255 next_entry = (start_position + pending) if file_object.tell() != next_entry: # Unknown Structure. logging.warning('Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format( start_position - 1, token_id, token_id)) # TODO: another way to save this information must be found. extra_tokens.update( {'message': self.MESSAGE_CAN_NOT_SAVE.format( start_position - 1, start_token_id)}) # Move to next entry. file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR) # It returns null list because it doesn't know witch structure was # the incorrect structure that makes that it can arrive to the spected # end of the entry. return {} return extra_tokens def FormatToken(self, token_id, token, file_object): """Parse the Token depending of the type of the structure. Args: token_id (int): identification of the token_type. token (structure): token struct to parse. file_object: BSM file. Returns: (dict): parsed Token values. Keys for returned dictionary are token name like BSM_TOKEN_SUBJECT32. Values of this dictionary are key-value pairs like terminal_ip:127.0.0.1. """ if token_id not in self._bsm_type_list_all: return {} bsm_type, _ = self._bsm_type_list_all.get(token_id, ['', '']) if token_id in self._BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES: try: string = self._CopyUtf8ByteArrayToString(token.text) except TypeError: string = 'Unknown' return {bsm_type: string} elif token_id in self._BSM_RETURN_TOKEN_TYPES: return {bsm_type: { 'error': bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'), 'token_status': token.status, 'call_status': token.return_value }} elif token_id in self._BSM_SUBJECT_TOKEN_TYPES: return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': self._IPv4Format(token.ipv4) }} elif token_id in self._BSM_SUBJECT_EX_TOKEN_TYPES: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format( token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = 'unknown' return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': ip }} elif token_id in self._BSM_ARGUMENT_TOKEN_TYPES: string = self._CopyUtf8ByteArrayToString(token.text) return {bsm_type: { 'string': string, 'num_arg': token.num_arg, 'is': token.name_arg}} elif token_id in self._BSM_EXEV_TOKEN_TYPES: arguments = [] for _ in range(0, token): sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object) string = self._CopyUtf8ByteArrayToString(sub_token.text) arguments.append(string) return {bsm_type: ' '.join(arguments)} elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32': return {bsm_type: { 'protocols': bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), 'net_type': token.net_type, 'port': token.port_number, 'address': self._IPv4Format(token.ipv4) }} elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128': return {bsm_type: { 'protocols': bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), 'net_type': token.net_type, 'port': token.port_number, 'address': self._IPv6Format(token.ipv6.high, token.ipv6.low) }} elif bsm_type == 'BSM_TOKEN_ADDR': return {bsm_type: self._IPv4Format(token)} elif bsm_type == 'BSM_TOKEN_IP': return {'IPv4_Header': '0x{0:s}]'.format(token.encode('hex'))} elif bsm_type == 'BSM_TOKEN_ADDR_EXT': return {bsm_type: { 'protocols': bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), 'net_type': token.net_type, 'address': self._IPv6Format(token.ipv6.high, token.ipv6.low) }} elif bsm_type == 'BSM_TOKEN_PORT': return {bsm_type: token} elif bsm_type == 'BSM_TOKEN_TRAILER': return {bsm_type: token.record_length} elif bsm_type == 'BSM_TOKEN_FILE': # TODO: if this timestamp is usefull, it must be extracted as a separate # event object. timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( token.timestamp, token.microseconds) date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC) date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S') string = self._CopyUtf8ByteArrayToString(token.text) return {bsm_type: {'string': string, 'timestamp': date_time_string}} elif bsm_type == 'BSM_TOKEN_IPC': return {bsm_type: { 'object_type': token.object_type, 'object_id': token.object_id }} elif token_id in self._BSM_PROCESS_TOKEN_TYPES: return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': self._IPv4Format(token.ipv4) }} elif token_id in self._BSM_PROCESS_EX_TOKEN_TYPES: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format( token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = 'unknown' return {bsm_type: { 'aid': token.subject_data.audit_uid, 'euid': token.subject_data.effective_uid, 'egid': token.subject_data.effective_gid, 'uid': token.subject_data.real_uid, 'gid': token.subject_data.real_gid, 'pid': token.subject_data.pid, 'session_id': token.subject_data.session_id, 'terminal_port': token.terminal_port, 'terminal_ip': ip }} elif bsm_type == 'BSM_TOKEN_DATA': data = [] data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '') if data_type == 'AUR_CHAR': for _ in range(token.unit_count): data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object)) elif data_type == 'AUR_SHORT': for _ in range(token.unit_count): data.append(self.BSM_TOKEN_DATA_SHORT.parse_stream(file_object)) elif data_type == 'AUR_INT32': for _ in range(token.unit_count): data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)) else: data.append('Unknown type data') # TODO: the data when it is string ends with ".", HW a space is return # after uses the UTF-8 conversion. return {bsm_type: { 'format': bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print], 'data': '{0}'.format(self._RawToUTF8(''.join(map(str, data)))) }} elif token_id in self._BSM_ATTR_TOKEN_TYPES: return {bsm_type: { 'mode': token.file_mode, 'uid': token.uid, 'gid': token.gid, 'system_id': token.file_system_id, 'node_id': token.file_system_node_id, 'device': token.device}} elif bsm_type == 'BSM_TOKEN_GROUPS': arguments = [] for _ in range(token): arguments.append( self._RawToUTF8( self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))) return {bsm_type: ','.join(arguments)} elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX': if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6': saddr = self._IPv6Format( token.structure_addr_port.saddr_high, token.structure_addr_port.saddr_low) daddr = self._IPv6Format( token.structure_addr_port.daddr_high, token.structure_addr_port.daddr_low) else: saddr = self._IPv4Format(token.structure_addr_port.source_address) daddr = self._IPv4Format(token.structure_addr_port.destination_address) return {bsm_type:{ 'from': saddr, 'from_port': token.structure_addr_port.source_port, 'to': daddr, 'to_port': token.structure_addr_port.destination_port}} elif bsm_type == 'BSM_TOKEN_IPC_PERM': return {bsm_type: { 'user_id': token.user_id, 'group_id': token.group_id, 'creator_user_id': token.creator_user_id, 'creator_group_id': token.creator_group_id, 'access': token.access_mode}} elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX': string = self._CopyUtf8ByteArrayToString(token.path) return {bsm_type: {'family': token.family, 'path': string}} elif bsm_type == 'BSM_TOKEN_OPAQUE': string = self._CopyByteArrayToBase16String(token.text) return {bsm_type: string} elif bsm_type == 'BSM_TOKEN_SEQUENCE': return {bsm_type: token}
class BsmParser(interface.FileObjectParser): """Parser for BSM files.""" _INITIAL_FILE_OFFSET = None NAME = u'bsm_log' DESCRIPTION = u'Parser for BSM log files.' # BSM supported version (0x0b = 11). AUDIT_HEADER_VERSION = 11 # Magic Trail Header. BSM_TOKEN_TRAILER_MAGIC = b'b105' # IP Version constants. AU_IPv4 = 4 AU_IPv6 = 16 IPV4_STRUCT = construct.UBInt32(u'ipv4') IPV6_STRUCT = construct.Struct(u'ipv6', construct.UBInt64(u'high'), construct.UBInt64(u'low')) # Tested structures. # INFO: I have ommited the ID in the structures declaration. # I used the BSM_TYPE first to read the ID, and then, the structure. # Tokens always start with an ID value that identifies their token # type and subsequent structure. BSM_TYPE = construct.UBInt8(u'token_id') # Data type structures. BSM_TOKEN_DATA_CHAR = construct.String(u'value', 1) BSM_TOKEN_DATA_SHORT = construct.UBInt16(u'value') BSM_TOKEN_DATA_INTEGER = construct.UBInt32(u'value') # Common structure used by other structures. # audit_uid: integer, uid that generates the entry. # effective_uid: integer, the permission user used. # effective_gid: integer, the permission group used. # real_uid: integer, user id of the user that execute the process. # real_gid: integer, group id of the group that execute the process. # pid: integer, identification number of the process. # session_id: unknown, need research. BSM_TOKEN_SUBJECT_SHORT = construct.Struct( u'subject_data', construct.UBInt32(u'audit_uid'), construct.UBInt32(u'effective_uid'), construct.UBInt32(u'effective_gid'), construct.UBInt32(u'real_uid'), construct.UBInt32(u'real_gid'), construct.UBInt32(u'pid'), construct.UBInt32(u'session_id')) # Common structure used by other structures. # Identify the kind of inet (IPv4 or IPv6) # TODO: instead of 16, AU_IPv6 must be used. BSM_IP_TYPE_SHORT = construct.Struct( u'bsm_ip_type_short', construct.UBInt32(u'net_type'), construct.Switch(u'ip_addr', _BsmTokenGetNetType, {16: IPV6_STRUCT}, default=IPV4_STRUCT)) # Initial fields structure used by header structures. # length: integer, the length of the entry, equal to trailer (doc: length). # version: integer, version of BSM (AUDIT_HEADER_VERSION). # event_type: integer, the type of event (/etc/security/audit_event). # modifier: integer, unknown, need research (It is always 0). BSM_HEADER = construct.Struct(u'bsm_header', construct.UBInt32(u'length'), construct.UBInt8(u'version'), construct.UBInt16(u'event_type'), construct.UBInt16(u'modifier')) # First token of one entry. # timestamp: unsigned integer, number of seconds since # January 1, 1970 00:00:00 UTC. # microsecond: unsigned integer, number of micro seconds. BSM_HEADER32 = construct.Struct(u'bsm_header32', BSM_HEADER, construct.UBInt32(u'timestamp'), construct.UBInt32(u'microsecond')) BSM_HEADER64 = construct.Struct(u'bsm_header64', BSM_HEADER, construct.UBInt64(u'timestamp'), construct.UBInt64(u'microsecond')) BSM_HEADER32_EX = construct.Struct(u'bsm_header32_ex', BSM_HEADER, BSM_IP_TYPE_SHORT, construct.UBInt32(u'timestamp'), construct.UBInt32(u'microsecond')) # Token TEXT, provides extra information. BSM_TOKEN_TEXT = construct.Struct( u'bsm_token_text', construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # Path of the executable. BSM_TOKEN_PATH = BSM_TOKEN_TEXT # Identified the end of the record (follow by TRAILER). # status: integer that identifies the status of the exit (BSM_ERRORS). # return: returned value from the operation. BSM_TOKEN_RETURN32 = construct.Struct(u'bsm_token_return32', construct.UBInt8(u'status'), construct.UBInt32(u'return_value')) BSM_TOKEN_RETURN64 = construct.Struct(u'bsm_token_return64', construct.UBInt8(u'status'), construct.UBInt64(u'return_value')) # Identified the number of bytes that was written. # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC). # length: integer that has the number of bytes from the entry size. BSM_TOKEN_TRAILER = construct.Struct(u'bsm_token_trailer', construct.UBInt16(u'magic'), construct.UBInt32(u'record_length')) # A 32-bits argument. # num_arg: the number of the argument. # name_arg: the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT32 = construct.Struct( u'bsm_token_argument32', construct.UBInt8(u'num_arg'), construct.UBInt32(u'name_arg'), construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # A 64-bits argument. # num_arg: integer, the number of the argument. # name_arg: text, the argument's name. # text: the string value of the argument. BSM_TOKEN_ARGUMENT64 = construct.Struct( u'bsm_token_argument64', construct.UBInt8(u'num_arg'), construct.UBInt64(u'name_arg'), construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # Identify an user. # terminal_id: unknown, research needed. # terminal_addr: unknown, research needed. BSM_TOKEN_SUBJECT32 = construct.Struct(u'bsm_token_subject32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), IPV4_STRUCT) # Identify an user using a extended Token. # terminal_port: unknown, need research. # net_type: unknown, need research. BSM_TOKEN_SUBJECT32_EX = construct.Struct( u'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT) # au_to_opaque // AUT_OPAQUE BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT # au_to_seq // AUT_SEQ BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER # Program execution with options. # For each argument we are going to have a string+ "\x00". # Example: [00 00 00 02][41 42 43 00 42 42 00] # 2 Arguments, Arg1: [414243] Arg2: [4242]. BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32(u'number_arguments') BSM_TOKEN_EXEC_ARGUMENT = construct.Struct( u'bsm_token_exec_argument', construct.RepeatUntil(_BsmTokenIsEndOfString, construct.StaticField("text", 1))) # au_to_in_addr // AUT_IN_ADDR: BSM_TOKEN_ADDR = IPV4_STRUCT # au_to_in_addr_ext // AUT_IN_ADDR_EX: BSM_TOKEN_ADDR_EXT = construct.Struct(u'bsm_token_addr_ext', construct.UBInt32(u'net_type'), IPV6_STRUCT) # au_to_ip // AUT_IP: # TODO: parse this header in the correct way. BSM_TOKEN_IP = construct.String(u'binary_ipv4_add', 20) # au_to_ipc // AUT_IPC: BSM_TOKEN_IPC = construct.Struct(u'bsm_token_ipc', construct.UBInt8(u'object_type'), construct.UBInt32(u'object_id')) # au_to_ipc_perm // au_to_ipc_perm BSM_TOKEN_IPC_PERM = construct.Struct( u'bsm_token_ipc_perm', construct.UBInt32(u'user_id'), construct.UBInt32(u'group_id'), construct.UBInt32(u'creator_user_id'), construct.UBInt32(u'creator_group_id'), construct.UBInt32(u'access_mode'), construct.UBInt32(u'slot_seq'), construct.UBInt32(u'key')) # au_to_iport // AUT_IPORT: BSM_TOKEN_PORT = construct.UBInt16(u'port_number') # au_to_file // AUT_OTHER_FILE32: BSM_TOKEN_FILE = construct.Struct( u'bsm_token_file', construct.UBInt32(u'timestamp'), construct.UBInt32(u'microsecond'), construct.UBInt16(u'length'), construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text'))) # au_to_subject64 // AUT_SUBJECT64: BSM_TOKEN_SUBJECT64 = construct.Struct(u'bsm_token_subject64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64(u'terminal_port'), IPV4_STRUCT) # au_to_subject64_ex // AU_IPv4: BSM_TOKEN_SUBJECT64_EX = construct.Struct( u'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), construct.UBInt32(u'terminal_type'), BSM_IP_TYPE_SHORT) # au_to_process32 // AUT_PROCESS32: BSM_TOKEN_PROCESS32 = construct.Struct(u'bsm_token_process32', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), IPV4_STRUCT) # au_to_process64 // AUT_PROCESS32: BSM_TOKEN_PROCESS64 = construct.Struct(u'bsm_token_process64', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64(u'terminal_port'), IPV4_STRUCT) # au_to_process32_ex // AUT_PROCESS32_EX: BSM_TOKEN_PROCESS32_EX = construct.Struct( u'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT) # au_to_process64_ex // AUT_PROCESS64_EX: BSM_TOKEN_PROCESS64_EX = construct.Struct( u'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT, construct.UBInt64(u'terminal_port'), BSM_IP_TYPE_SHORT) # au_to_sock_inet32 // AUT_SOCKINET32: BSM_TOKEN_AUT_SOCKINET32 = construct.Struct( u'bsm_token_aut_sockinet32', construct.UBInt16(u'net_type'), construct.UBInt16(u'port_number'), IPV4_STRUCT) # Info: checked against the source code of XNU, but not against # real BSM file. BSM_TOKEN_AUT_SOCKINET128 = construct.Struct( u'bsm_token_aut_sockinet128', construct.UBInt16(u'net_type'), construct.UBInt16(u'port_number'), IPV6_STRUCT) INET6_ADDR_TYPE = construct.Struct(u'addr_type', construct.UBInt16(u'ip_type'), construct.UBInt16(u'source_port'), construct.UBInt64(u'saddr_high'), construct.UBInt64(u'saddr_low'), construct.UBInt16(u'destination_port'), construct.UBInt64(u'daddr_high'), construct.UBInt64(u'daddr_low')) INET4_ADDR_TYPE = construct.Struct( u'addr_type', construct.UBInt16(u'ip_type'), construct.UBInt16(u'source_port'), construct.UBInt32(u'source_address'), construct.UBInt16(u'destination_port'), construct.UBInt32(u'destination_address')) # au_to_socket_ex // AUT_SOCKET_EX # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6. BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct( u'bsm_token_aut_sockinet32_ex', construct.UBInt16(u'socket_domain'), construct.UBInt16(u'socket_type'), construct.Switch(u'structure_addr_port', _BsmTokenGetSocketDomain, {26: INET6_ADDR_TYPE}, default=INET4_ADDR_TYPE)) # au_to_sock_unix // AUT_SOCKUNIX BSM_TOKEN_SOCKET_UNIX = construct.Struct( u'bsm_token_au_to_sock_unix', construct.UBInt16(u'family'), construct.RepeatUntil(_BsmTokenIsEndOfString, construct.StaticField("path", 1))) # au_to_data // au_to_data # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT. # type: bsmtoken.BSM_TOKEN_DATA_TYPE. # unit_count: number of type values. # BSM_TOKEN_DATA has a end field = type * unit_count BSM_TOKEN_DATA = construct.Struct(u'bsm_token_data', construct.UBInt8(u'how_to_print'), construct.UBInt8(u'data_type'), construct.UBInt8(u'unit_count')) # au_to_attr32 // AUT_ATTR32 BSM_TOKEN_ATTR32 = construct.Struct( u'bsm_token_attr32', construct.UBInt32(u'file_mode'), construct.UBInt32(u'uid'), construct.UBInt32(u'gid'), construct.UBInt32(u'file_system_id'), construct.UBInt64(u'file_system_node_id'), construct.UBInt32(u'device')) # au_to_attr64 // AUT_ATTR64 BSM_TOKEN_ATTR64 = construct.Struct( u'bsm_token_attr64', construct.UBInt32(u'file_mode'), construct.UBInt32(u'uid'), construct.UBInt32(u'gid'), construct.UBInt32(u'file_system_id'), construct.UBInt64(u'file_system_node_id'), construct.UBInt64(u'device')) # au_to_exit // AUT_EXIT BSM_TOKEN_EXIT = construct.Struct(u'bsm_token_exit', construct.UBInt32(u'status'), construct.UBInt32(u'return_value')) # au_to_newgroups // AUT_NEWGROUPS # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group. BSM_TOKEN_GROUPS = construct.UBInt16(u'group_number') # au_to_exec_env == au_to_exec_args BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS # au_to_zonename //AUT_ZONENAME BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT # Token ID. # List of valid Token_ID. # Token_ID -> [NAME_STRUCTURE, STRUCTURE] # Only the checked structures are been added to the valid structures lists. BSM_TYPE_LIST = { 17: [u'BSM_TOKEN_FILE', BSM_TOKEN_FILE], 19: [u'BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER], 20: [u'BSM_HEADER32', BSM_HEADER32], 21: [u'BSM_HEADER64', BSM_HEADER64], 33: [u'BSM_TOKEN_DATA', BSM_TOKEN_DATA], 34: [u'BSM_TOKEN_IPC', BSM_TOKEN_IPC], 35: [u'BSM_TOKEN_PATH', BSM_TOKEN_PATH], 36: [u'BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32], 38: [u'BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32], 39: [u'BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32], 40: [u'BSM_TOKEN_TEXT', BSM_TOKEN_TEXT], 41: [u'BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE], 42: [u'BSM_TOKEN_ADDR', BSM_TOKEN_ADDR], 43: [u'BSM_TOKEN_IP', BSM_TOKEN_IP], 44: [u'BSM_TOKEN_PORT', BSM_TOKEN_PORT], 45: [u'BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32], 47: [u'BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE], 96: [u'BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME], 113: [u'BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64], 114: [u'BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64], 116: [u'BSM_HEADER32_EX', BSM_HEADER32_EX], 119: [u'BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64], 122: [u'BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX], 127: [u'BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX], 128: [u'BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32] } # Untested structures. # When not tested structure is found, we try to parse using also # these structures. BSM_TYPE_LIST_NOT_TESTED = { 49: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32], 50: [u'BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM], 52: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS], 59: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS], 60: [u'BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS], 61: [u'BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV], 62: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32], 82: [u'BSM_TOKEN_EXIT', BSM_TOKEN_EXIT], 115: [u'BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64], 117: [u'BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64], 123: [u'BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX], 124: [u'BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX], 125: [u'BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX], 126: [u'BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT], 129: [u'BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128], 130: [u'BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX] } def __init__(self): """Initializes a parser object.""" super(BsmParser, self).__init__() # Create the dictionary with all token IDs: tested and untested. self.bsm_type_list_all = self.BSM_TYPE_LIST.copy() self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED) def _CopyByteArrayToBase16String(self, byte_array): """Copies a byte array into a base-16 encoded Unicode string. Args: byte_array: A byte array. Returns: A base-16 encoded Unicode string. """ return u''.join([u'{0:02x}'.format(byte) for byte in byte_array]) def _CopyUtf8ByteArrayToString(self, byte_array): """Copies a UTF-8 encoded byte array into a Unicode string. Args: byte_array: A byte array containing an UTF-8 encoded string. Returns: A Unicode string. """ byte_stream = b''.join(map(chr, byte_array)) try: string = byte_stream.decode(u'utf-8') except UnicodeDecodeError: logging.warning(u'Unable to decode UTF-8 formatted byte array.') string = byte_stream.decode(u'utf-8', errors=u'ignore') string, _, _ = string.partition(b'\x00') return string def _IPv4Format(self, address): """Change an integer IPv4 address value for its 4 octets representation. Args: address: integer with the IPv4 address. Returns: IPv4 address in 4 octet representation (class A, B, C, D). """ ipv4_string = self.IPV4_STRUCT.build(address) return socket.inet_ntoa(ipv4_string) def _IPv6Format(self, high, low): """Provide a readable IPv6 IP having the high and low part in 2 integers. Args: high: 64 bits integers number with the high part of the IPv6. low: 64 bits integers number with the low part of the IPv6. Returns: String with a well represented IPv6. """ ipv6_string = self.IPV6_STRUCT.build( construct.Container(high=high, low=low)) # socket.inet_ntop not supported in Windows. if hasattr(socket, u'inet_ntop'): return socket.inet_ntop(socket.AF_INET6, ipv6_string) # TODO: this approach returns double "::", illegal IPv6 addr. str_address = binascii.hexlify(ipv6_string) address = [] blank = False for pos in range(0, len(str_address), 4): if str_address[pos:pos + 4] == u'0000': if not blank: address.append(u'') blank = True else: blank = False address.append(str_address[pos:pos + 4].lstrip(u'0')) return u':'.join(address) def _RawToUTF8(self, byte_stream): """Copies a UTF-8 byte stream into a Unicode string. Args: byte_stream: A byte stream containing an UTF-8 encoded string. Returns: A Unicode string. """ try: string = byte_stream.decode(u'utf-8') except UnicodeDecodeError: logging.warning( u'Decode UTF8 failed, the message string may be cut short.') string = byte_stream.decode(u'utf-8', errors=u'ignore') return string.partition(b'\x00')[0] def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a BSM file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) try: is_bsm = self.VerifyFile(parser_mediator, file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse BSM file with error: {0:s}'.format( exception)) if not is_bsm: raise errors.UnableToParseFile(u'Not a BSM File, unable to parse.') event_object = self.ReadBSMEvent(parser_mediator, file_object) while event_object: parser_mediator.ProduceEvent(event_object) event_object = self.ReadBSMEvent(parser_mediator, file_object) def ReadBSMEvent(self, parser_mediator, file_object): """Returns a BsmEvent from a single BSM entry. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Returns: An event object. """ # A list of tokens that has the entry. extra_tokens = [] offset = file_object.tell() # Token header, first token for each entry. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): return bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u'']) if bsm_type == u'BSM_HEADER32': token = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER64': token = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER32_EX': token = structure.parse_stream(file_object) else: logging.warning( u'Token ID Header {0} not expected at position 0x{1:X}.' u'The parsing of the file cannot be continued'.format( token_id, file_object.tell())) # TODO: if it is a Mac OS X, search for the trailer magic value # as a end of the entry can be a possibility to continue. return length = token.bsm_header.length event_type = u'{0} ({1})'.format( bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type, u'UNKNOWN'), token.bsm_header.event_type) timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( token.timestamp, token.microsecond) # Read until we reach the end of the record. while file_object.tell() < (offset + length): # Check if it is a known token. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'Unable to parse the Token ID at position: {0:d}'.format( file_object.tell())) return if not token_id in self.BSM_TYPE_LIST: pending = (offset + length) - file_object.tell() extra_tokens.extend( self.TryWithUntestedStructures(file_object, token_id, pending)) else: token = self.BSM_TYPE_LIST[token_id][1].parse_stream( file_object) extra_tokens.append( self.FormatToken(token_id, token, file_object)) if file_object.tell() > (offset + length): logging.warning(u'Token ID {0} not expected at position 0x{1:X}.' u'Jumping for the next entry.'.format( token_id, file_object.tell())) try: file_object.seek((offset + length) - file_object.tell(), os.SEEK_CUR) except (IOError, construct.FieldError) as exception: logging.warning( u'Unable to jump to next entry with error: {0:s}'.format( exception)) return # BSM can be in more than one OS: BSD, Solaris and Mac OS X. if parser_mediator.platform == u'MacOSX': # In Mac OS X the last two tokens are the return status and the trailer. if len(extra_tokens) >= 2: return_value = extra_tokens[-2:-1][0] if (return_value.startswith(u'[BSM_TOKEN_RETURN32') or return_value.startswith(u'[BSM_TOKEN_RETURN64')): _ = extra_tokens.pop(len(extra_tokens) - 2) else: return_value = u'Return unknown' else: return_value = u'Return unknown' if extra_tokens: trailer = extra_tokens[-1] if trailer.startswith(u'[BSM_TOKEN_TRAILER'): _ = extra_tokens.pop(len(extra_tokens) - 1) else: trailer = u'Trailer unknown' else: trailer = u'Trailer unknown' return MacBsmEvent(event_type, timestamp, u'. '.join(extra_tokens), return_value, trailer, offset) else: # Generic BSM format. if extra_tokens: trailer = extra_tokens[-1] if trailer.startswith(u'[BSM_TOKEN_TRAILER'): _ = extra_tokens.pop(len(extra_tokens) - 1) else: trailer = u'Trailer unknown' else: trailer = u'Trailer unknown' return BsmEvent(event_type, timestamp, u'. '.join(extra_tokens), trailer, offset) def VerifyFile(self, parser_mediator, file_object): """Check if the file is a BSM file. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_event: file that we want to check. Returns: True if this is a valid BSM file, otherwise False. """ if file_object.tell() != 0: file_object.seek(0) # First part of the entry is always a Header. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): return False if token_id not in self.BSM_TYPE_LIST: return False bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u'']) try: if bsm_type == u'BSM_HEADER32': header = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER64': header = structure.parse_stream(file_object) elif bsm_type == u'BSM_HEADER32_EX': header = structure.parse_stream(file_object) else: return False except (IOError, construct.FieldError): return False if header.bsm_header.version != self.AUDIT_HEADER_VERSION: return False try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): return False # If is Mac OS X BSM file, next entry is a text token indicating # if it is a normal start or it is a recovery track. if parser_mediator.platform == u'MacOSX': bsm_type_list = self.BSM_TYPE_LIST.get(token_id) if not bsm_type_list: return False if bsm_type_list[0] != u'BSM_TOKEN_TEXT': logging.warning( u'It is not a valid first entry for Mac OS X BSM.') return False try: token = self.BSM_TOKEN_TEXT.parse_stream(file_object) except (IOError, construct.FieldError): return text = self._CopyUtf8ByteArrayToString(token.text) if (text != u'launchctl::Audit startup' and text != u'launchctl::Audit recovery'): logging.warning( u'It is not a valid first entry for Mac OS X BSM.') return False file_object.seek(0) return True def TryWithUntestedStructures(self, file_object, token_id, pending): """Try to parse the pending part of the entry using untested structures. Args: file_object: BSM file. token_id: integer with the id that comes from the unknown token. pending: pending length of the entry. Returns: A list of extra tokens data that can be parsed using non-tested structures. A message indicating that a structure cannot be parsed is added for unparsed structures. """ # Data from the unknown structure. start_position = file_object.tell() start_token_id = token_id extra_tokens = [] # Read all the "pending" bytes. try: if token_id in self.bsm_type_list_all: token = self.bsm_type_list_all[token_id][1].parse_stream( file_object) extra_tokens.append( self.FormatToken(token_id, token, file_object)) while file_object.tell() < (start_position + pending): # Check if it is a known token. try: token_id = self.BSM_TYPE.parse_stream(file_object) except (IOError, construct.FieldError): logging.warning( u'Unable to parse the Token ID at position: {0:d}'. format(file_object.tell())) return if token_id not in self.bsm_type_list_all: break token = self.bsm_type_list_all[token_id][1].parse_stream( file_object) extra_tokens.append( self.FormatToken(token_id, token, file_object)) except (IOError, construct.FieldError): token_id = 255 next_entry = (start_position + pending) if file_object.tell() != next_entry: # Unknown Structure. logging.warning( u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format( start_position - 1, token_id, token_id)) # TODO: another way to save this information must be found. extra_tokens.append(u'Plaso: some tokens from this entry can ' u'not be saved. Entry at 0x{0:X} with unknown ' u'token id "0x{1:X}".'.format( start_position - 1, start_token_id)) # Move to next entry. file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR) # It returns null list because it doesn't know witch structure was # the incorrect structure that makes that it can arrive to the spected # end of the entry. return [] return extra_tokens # TODO: instead of compare the text to know what structure was parsed # is better to compare directly the numeric number (token_id), # less readable, but better performance. def FormatToken(self, token_id, token, file_object): """Parse the Token depending of the type of the structure. Args: token_id: Identification integer of the token_type. token: Token struct to parse. file_object: BSM file. Returns: String with the parsed Token values. """ if token_id not in self.bsm_type_list_all: return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id) bsm_type, _ = self.bsm_type_list_all.get(token_id, [u'', u'']) if bsm_type in [ u'BSM_TOKEN_TEXT', u'BSM_TOKEN_PATH', u'BSM_TOKEN_ZONENAME' ]: try: string = self._CopyUtf8ByteArrayToString(token.text) except TypeError: string = u'Unknown' return u'[{0}: {1:s}]'.format(bsm_type, string) elif bsm_type in [ u'BSM_TOKEN_RETURN32', u'BSM_TOKEN_RETURN64', u'BSM_TOKEN_EXIT' ]: return u'[{0}: {1} ({2}), System call status: {3}]'.format( bsm_type, bsmtoken.BSM_ERRORS.get(token.status, u'Unknown'), token.status, token.return_value) elif bsm_type in [u'BSM_TOKEN_SUBJECT32', u'BSM_TOKEN_SUBJECT64']: return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, self._IPv4Format(token.ipv4)) elif bsm_type in [ u'BSM_TOKEN_SUBJECT32_EX', u'BSM_TOKEN_SUBJECT64_EX' ]: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = u'unknown' return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, ip) elif bsm_type in [u'BSM_TOKEN_ARGUMENT32', u'BSM_TOKEN_ARGUMENT64']: string = self._CopyUtf8ByteArrayToString(token.text) return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format( bsm_type, string, token.num_arg, token.name_arg) elif bsm_type in [u'BSM_TOKEN_EXEC_ARGUMENTS', u'BSM_TOKEN_EXEC_ENV']: arguments = [] for _ in range(0, token): sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream( file_object) string = self._CopyUtf8ByteArrayToString(sub_token.text) arguments.append(string) return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments)) elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32': return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format( bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, u'UNKNOWN'), token.net_type, token.port_number, self._IPv4Format(token.ipv4))) elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET128': return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format( bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, u'UNKNOWN'), token.net_type, token.port_number, self._IPv6Format(token.ipv6.high, token.ipv6.low)) elif bsm_type == u'BSM_TOKEN_ADDR': return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token)) elif bsm_type == u'BSM_TOKEN_IP': return u'[IPv4_Header: 0x{0:s}]'.format(token.encode(u'hex')) elif bsm_type == u'BSM_TOKEN_ADDR_EXT': return u'[{0}: {1} ({2}). Address {3}]'.format( bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, u'UNKNOWN'), token.net_type, self._IPv6Format(token.ipv6.high, token.ipv6.low)) elif bsm_type == u'BSM_TOKEN_PORT': return u'[{0}: {1}]'.format(bsm_type, token) elif bsm_type == u'BSM_TOKEN_TRAILER': return u'[{0}: {1}]'.format(bsm_type, token.record_length) elif bsm_type == u'BSM_TOKEN_FILE': # TODO: if this timestamp is usefull, it must be extracted as a separate # event object. timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( token.timestamp, token.microsecond) date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC) date_time_string = date_time.strftime(u'%Y-%m-%d %H:%M:%S') string = self._CopyUtf8ByteArrayToString(token.text) return u'[{0}: {1:s}, timestamp: {2:s}]'.format( bsm_type, string, date_time_string) elif bsm_type == u'BSM_TOKEN_IPC': return u'[{0}: object type {1}, object id {2}]'.format( bsm_type, token.object_type, token.object_id) elif bsm_type in [u'BSM_TOKEN_PROCESS32', u'BSM_TOKEN_PROCESS64']: return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, self._IPv4Format(token.ipv4)) elif bsm_type in [ u'BSM_TOKEN_PROCESS32_EX', u'BSM_TOKEN_PROCESS64_EX' ]: if token.bsm_ip_type_short.net_type == self.AU_IPv6: ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high, token.bsm_ip_type_short.ip_addr.low) elif token.bsm_ip_type_short.net_type == self.AU_IPv4: ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) else: ip = u'unknown' return ( u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' u'pid({6}), session_id({7}), terminal_port({8}), ' u'terminal_ip({9})]').format( bsm_type, token.subject_data.audit_uid, token.subject_data.effective_uid, token.subject_data.effective_gid, token.subject_data.real_uid, token.subject_data.real_gid, token.subject_data.pid, token.subject_data.session_id, token.terminal_port, ip) elif bsm_type == u'BSM_TOKEN_DATA': data = [] data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, u'') if data_type == u'AUR_CHAR': for _ in range(token.unit_count): data.append( self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object)) elif data_type == u'AUR_SHORT': for _ in range(token.unit_count): data.append( self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object)) elif data_type == u'AUR_INT32': for _ in range(token.unit_count): data.append( self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)) else: data.append(u'Unknown type data') # TODO: the data when it is string ends with ".", HW a space is return # after uses the UTF-8 conversion. return u'[{0}: Format data: {1}, Data: {2}]'.format( bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print], self._RawToUTF8(u''.join(data))) elif bsm_type in [u'BSM_TOKEN_ATTR32', u'BSM_TOKEN_ATTR64']: return (u'[{0}: Mode: {1}, UID: {2}, GID: {3}, ' u'File system ID: {4}, Node ID: {5}, Device: {6}]').format( bsm_type, token.file_mode, token.uid, token.gid, token.file_system_id, token.file_system_node_id, token.device) elif bsm_type == u'BSM_TOKEN_GROUPS': arguments = [] for _ in range(token): arguments.append( self._RawToUTF8( self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))) return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments)) elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32_EX': if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, u'') == u'INET6': saddr = self._IPv6Format(token.structure_addr_port.saddr_high, token.structure_addr_port.saddr_low) daddr = self._IPv6Format(token.structure_addr_port.daddr_high, token.structure_addr_port.daddr_low) else: saddr = self._IPv4Format( token.structure_addr_port.source_address) daddr = self._IPv4Format( token.structure_addr_port.destination_address) return u'[{0}: from {1} port {2} to {3} port {4}]'.format( bsm_type, saddr, token.structure_addr_port.source_port, daddr, token.structure_addr_port.destination_port) elif bsm_type == u'BSM_TOKEN_IPC_PERM': return (u'[{0}: user id {1}, group id {2}, create user id {3}, ' u'create group id {4}, access {5}]').format( bsm_type, token.user_id, token.group_id, token.creator_user_id, token.creator_group_id, token.access_mode) elif bsm_type == u'BSM_TOKEN_SOCKET_UNIX': string = self._CopyUtf8ByteArrayToString(token.path) return u'[{0}: Family {1}, Path {2:s}]'.format( bsm_type, token.family, string) elif bsm_type == u'BSM_TOKEN_OPAQUE': string = self._CopyByteArrayToBase16String(token.text) return u'[{0}: {1:s}]'.format(bsm_type, string) elif bsm_type == u'BSM_TOKEN_SEQUENCE': return u'[{0}: {1}]'.format(bsm_type, token)
class KeychainParser(interface.FileObjectParser): """Parser for Keychain files.""" NAME = 'mac_keychain' DESCRIPTION = 'Parser for MacOS Keychain files.' KEYCHAIN_SIGNATURE = b'kych' KEYCHAIN_MAJOR_VERSION = 1 KEYCHAIN_MINOR_VERSION = 0 RECORD_TYPE_APPLICATION = 0x80000000 RECORD_TYPE_INTERNET = 0x80000001 # DB HEADER. KEYCHAIN_DB_HEADER = construct.Struct( 'db_header', construct.Bytes('signature', 4), construct.UBInt16('major_version'), construct.UBInt16('minor_version'), construct.UBInt32('header_size'), construct.UBInt32('schema_offset'), construct.Padding(4)) # DB SCHEMA. KEYCHAIN_DB_SCHEMA = construct.Struct( 'db_schema', construct.UBInt32('size'), construct.UBInt32('number_of_tables')) # For each number_of_tables, the schema has a TABLE_OFFSET with the # offset starting in the DB_SCHEMA. TABLE_OFFSET = construct.UBInt32('table_offset') TABLE_HEADER = construct.Struct( 'table_header', construct.UBInt32('table_size'), construct.UBInt32('record_type'), construct.UBInt32('number_of_records'), construct.UBInt32('first_record'), construct.UBInt32('index_offset'), construct.Padding(4), construct.UBInt32('recordnumbercount')) RECORD_HEADER = construct.Struct( 'record_header', construct.UBInt32('entry_length'), construct.Padding(12), construct.UBInt32('ssgp_length'), construct.Padding(4), construct.UBInt32('creation_time'), construct.UBInt32('last_modification_time'), construct.UBInt32('text_description'), construct.Padding(4), construct.UBInt32('comments'), construct.Padding(8), construct.UBInt32('entry_name'), construct.Padding(20), construct.UBInt32('account_name'), construct.Padding(4)) RECORD_HEADER_APP = construct.Struct( 'record_entry_app', RECORD_HEADER, construct.Padding(4)) RECORD_HEADER_INET = construct.Struct( 'record_entry_inet', RECORD_HEADER, construct.UBInt32('where'), construct.UBInt32('protocol'), construct.UBInt32('type'), construct.Padding(4), construct.UBInt32('url')) TEXT = construct.PascalString( 'text', length_field=construct.UBInt32('length')) TIME = construct.Struct( 'timestamp', construct.String('year', 4), construct.String('month', 2), construct.String('day', 2), construct.String('hour', 2), construct.String('minute', 2), construct.String('second', 2), construct.Padding(2)) TYPE_TEXT = construct.String('type', 4) # TODO: add more protocols. _PROTOCOL_TRANSLATION_DICT = { 'htps': 'https', 'smtp': 'smtp', 'imap': 'imap', 'http': 'http'} def _ReadEntryApplication(self, parser_mediator, file_object): """Extracts the information from an application password entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. """ record_offset = file_object.tell() try: record_struct = self.RECORD_HEADER_APP.parse_stream(file_object) except (IOError, construct.FieldError): parser_mediator.ProduceExtractionError( 'unable to parse record structure at offset: 0x{0:08x}'.format( record_offset)) return (ssgp_hash, creation_time, last_modification_time, text_description, comments, entry_name, account_name) = self._ReadEntryHeader( parser_mediator, file_object, record_struct.record_header, record_offset) # Move to the end of the record. next_record_offset = ( record_offset + record_struct.record_header.entry_length) file_object.seek(next_record_offset, os.SEEK_SET) event_data = KeychainApplicationRecordEventData() event_data.account_name = account_name event_data.comments = comments event_data.entry_name = entry_name event_data.ssgp_hash = ssgp_hash event_data.text_description = text_description if creation_time: event = time_events.DateTimeValuesEvent( creation_time, definitions.TIME_DESCRIPTION_CREATION) parser_mediator.ProduceEventWithEventData(event, event_data) if last_modification_time: event = time_events.DateTimeValuesEvent( last_modification_time, definitions.TIME_DESCRIPTION_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) def _ReadEntryHeader( self, parser_mediator, file_object, record, record_offset): """Read the common record attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_entry (dfvfs.FileEntry): a file entry object. file_object (dfvfs.FileIO): a file-like object. record (construct.Struct): record header structure. record_offset (int): offset of the start of the record. Returns: A tuple containing: ssgp_hash: Hash of the encrypted data (passwd, cert, note). creation_time (dfdatetime.TimeElements): entry creation time or None. last_modification_time ((dfdatetime.TimeElements): entry last modification time or None. text_description: A brief description of the entry. entry_name: Name of the entry account_name: Name of the account. """ # TODO: reduce number of seeks and/or offset calculations needed # for parsing. # Info: The hash header always start with the string ssgp follow by # the hash. Furthermore The fields are always a multiple of four. # Then if it is not multiple the value is padded by 0x00. ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:]) creation_time = None structure_offset = record_offset + record.creation_time - 1 file_object.seek(structure_offset, os.SEEK_SET) try: time_structure = self.TIME.parse_stream(file_object) except construct.FieldError as exception: time_structure = None parser_mediator.ProduceExtractionError( 'unable to parse creation time with error: {0!s}'.format(exception)) if time_structure: time_elements_tuple = ( time_structure.year, time_structure.month, time_structure.day, time_structure.hour, time_structure.minute, time_structure.second) creation_time = dfdatetime_time_elements.TimeElements() try: creation_time.CopyFromStringTuple( time_elements_tuple=time_elements_tuple) except ValueError: creation_time = None parser_mediator.ProduceExtractionError( 'invalid creation time value: {0!s}'.format(time_elements_tuple)) last_modification_time = None structure_offset = record_offset + record.last_modification_time - 1 file_object.seek(structure_offset, os.SEEK_SET) try: time_structure = self.TIME.parse_stream(file_object) except construct.FieldError as exception: time_structure = None parser_mediator.ProduceExtractionError( 'unable to parse last modification time with error: {0!s}'.format( exception)) if time_structure: time_elements_tuple = ( time_structure.year, time_structure.month, time_structure.day, time_structure.hour, time_structure.minute, time_structure.second) last_modification_time = dfdatetime_time_elements.TimeElements() try: last_modification_time.CopyFromStringTuple( time_elements_tuple=time_elements_tuple) except ValueError: last_modification_time = None parser_mediator.ProduceExtractionError( 'invalid last modification time value: {0!s}'.format( time_elements_tuple)) text_description = 'N/A' if record.text_description: structure_offset = record_offset + record.text_description - 1 file_object.seek(structure_offset, os.SEEK_SET) try: text_description = self.TEXT.parse_stream(file_object) except construct.FieldError as exception: parser_mediator.ProduceExtractionError( 'unable to parse text description with error: {0!s}'.format( exception)) comments = 'N/A' if record.comments: structure_offset = record_offset + record.comments - 1 file_object.seek(structure_offset, os.SEEK_SET) try: comments = self.TEXT.parse_stream(file_object) except construct.FieldError as exception: parser_mediator.ProduceExtractionError( 'unable to parse comments with error: {0!s}'.format(exception)) structure_offset = record_offset + record.entry_name - 1 file_object.seek(structure_offset, os.SEEK_SET) try: entry_name = self.TEXT.parse_stream(file_object) except construct.FieldError as exception: entry_name = 'N/A' parser_mediator.ProduceExtractionError( 'unable to parse entry name with error: {0!s}'.format(exception)) structure_offset = record_offset + record.account_name - 1 file_object.seek(structure_offset, os.SEEK_SET) try: account_name = self.TEXT.parse_stream(file_object) except construct.FieldError as exception: account_name = 'N/A' parser_mediator.ProduceExtractionError( 'unable to parse account name with error: {0!s}'.format(exception)) return ( ssgp_hash, creation_time, last_modification_time, text_description, comments, entry_name, account_name) def _ReadEntryInternet(self, parser_mediator, file_object): """Extracts the information from an Internet password entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. """ record_offset = file_object.tell() try: record_header_struct = self.RECORD_HEADER_INET.parse_stream(file_object) except (IOError, construct.FieldError): parser_mediator.ProduceExtractionError(( 'unable to parse record header structure at offset: ' '0x{0:08x}').format(record_offset)) return (ssgp_hash, creation_time, last_modification_time, text_description, comments, entry_name, account_name) = self._ReadEntryHeader( parser_mediator, file_object, record_header_struct.record_header, record_offset) if not record_header_struct.where: where = 'N/A' protocol = 'N/A' type_protocol = 'N/A' else: offset = record_offset + record_header_struct.where - 1 file_object.seek(offset, os.SEEK_SET) where = self.TEXT.parse_stream(file_object) offset = record_offset + record_header_struct.protocol - 1 file_object.seek(offset, os.SEEK_SET) protocol = self.TYPE_TEXT.parse_stream(file_object) offset = record_offset + record_header_struct.type - 1 file_object.seek(offset, os.SEEK_SET) type_protocol = self.TEXT.parse_stream(file_object) type_protocol = self._PROTOCOL_TRANSLATION_DICT.get( type_protocol, type_protocol) if record_header_struct.url: offset = record_offset + record_header_struct.url - 1 file_object.seek(offset, os.SEEK_SET) url = self.TEXT.parse_stream(file_object) where = '{0:s}{1:s}'.format(where, url) # Move to the end of the record. next_record_offset = ( record_offset + record_header_struct.record_header.entry_length) file_object.seek(next_record_offset, os.SEEK_SET) event_data = KeychainInternetRecordEventData() event_data.account_name = account_name event_data.comments = comments event_data.entry_name = entry_name event_data.protocol = protocol event_data.ssgp_hash = ssgp_hash event_data.text_description = text_description event_data.type_protocol = type_protocol event_data.where = where if creation_time: event = time_events.DateTimeValuesEvent( creation_time, definitions.TIME_DESCRIPTION_CREATION) parser_mediator.ProduceEventWithEventData(event, event_data) if last_modification_time: event = time_events.DateTimeValuesEvent( last_modification_time, definitions.TIME_DESCRIPTION_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) def _ReadTableOffsets(self, parser_mediator, file_object): """Reads the table offsets. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Returns: list[int]: table offsets. """ # INFO: The HEADER KEYCHAIN: # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z] # Where the table offset is relative to the first byte of the DB Schema, # then we must add to this offset the size of the [DBHEADER]. # Read the database schema and extract the offset for all the tables. # They are ordered by file position from the top to the bottom of the file. table_offsets = [] try: db_schema_struct = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object) except (IOError, construct.FieldError): parser_mediator.ProduceExtractionError( 'unable to parse database schema structure') return [] for index in range(db_schema_struct.number_of_tables): try: table_offset = self.TABLE_OFFSET.parse_stream(file_object) except (IOError, construct.FieldError): parser_mediator.ProduceExtractionError( 'unable to parse table offsets: {0:d}'.format(index)) return table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof()) return table_offsets @classmethod def GetFormatSpecification(cls): """Retrieves the format specification. Returns: FormatSpecification: format specification. """ format_specification = specification.FormatSpecification(cls.NAME) format_specification.AddNewSignature( cls.KEYCHAIN_SIGNATURE, offset=0) return format_specification def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a MacOS keychain file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object) except (IOError, construct.FieldError): raise errors.UnableToParseFile('Unable to parse file header.') if db_header.signature != self.KEYCHAIN_SIGNATURE: raise errors.UnableToParseFile('Not a MacOS keychain file.') if (db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or db_header.minor_version != self.KEYCHAIN_MINOR_VERSION): parser_mediator.ProduceExtractionError( 'unsupported format version: {0:s}.{1:s}'.format( db_header.major_version, db_header.minor_version)) return # TODO: document format and determine if -1 offset correction is needed. table_offsets = self._ReadTableOffsets(parser_mediator, file_object) for table_offset in table_offsets: # Skipping X bytes, unknown data at this point. file_object.seek(table_offset, os.SEEK_SET) try: table = self.TABLE_HEADER.parse_stream(file_object) except (IOError, construct.FieldError): parser_mediator.ProduceExtractionError( 'unable to parse table structure at offset: 0x{0:08x}'.format( table_offset)) continue # Table_offset: absolute byte in the file where the table starts. # table.first_record: first record in the table, relative to the # first byte of the table. file_object.seek(table_offset + table.first_record, os.SEEK_SET) if table.record_type == self.RECORD_TYPE_INTERNET: for _ in range(table.number_of_records): self._ReadEntryInternet(parser_mediator, file_object) elif table.record_type == self.RECORD_TYPE_APPLICATION: for _ in range(table.number_of_records): self._ReadEntryApplication(parser_mediator, file_object)
class CupsIppParser(interface.FileObjectParser): """Parser for CUPS IPP files. """ NAME = 'cups_ipp' DESCRIPTION = 'Parser for CUPS IPP files.' # INFO: # For each file, we have only one document with three different timestamps: # Created, process and finished. # Format: # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] # GROUP ID: [1byte ID] # PAIR: [TagID][\x00][Name][Value]) # TagID: 1 byte integer with the type of "Value". # Name: [Length][Text][\00] # Name can be empty when the name has more than one value. # Example: family name "lopez mata" with more than one surname. # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] # Value: can be integer, boolean, or text provided by TagID. # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] # If integer, Value: [\x04][Integer] # If text, Value: [Length text][Text][\00] # Magic number that identify the CUPS IPP supported version. IPP_MAJOR_VERSION = 2 IPP_MINOR_VERSION = 0 # Supported Operation ID. IPP_OP_ID = 5 # CUPS IPP File header. CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct', construct.UBInt8('major_version'), construct.UBInt8('minor_version'), construct.UBInt16('operation_id'), construct.UBInt32('request_id')) # Group ID that indicates the end of the IPP Control file. GROUP_END = 3 # Identification Groups. GROUP_LIST = [1, 2, 4, 5, 6, 7] # Type ID, per cups source file ipp-support.c. TYPE_GENERAL_INTEGER = 0x20 TYPE_INTEGER = 0x21 TYPE_BOOL = 0x22 TYPE_ENUMERATION = 0x23 TYPE_DATETIME = 0x31 # Type of values that can be extracted. INTEGER_8 = construct.UBInt8('integer') INTEGER_32 = construct.UBInt32('integer') TEXT = construct.PascalString('text', length_field=construct.UBInt8('length')) BOOLEAN = construct.Struct('boolean_value', construct.Padding(1), INTEGER_8) INTEGER = construct.Struct('integer_value', construct.Padding(1), INTEGER_32) # This is an RFC2579 datetime. DATETIME = construct.Struct( 'datetime', construct.Padding(1), construct.UBInt16('year'), construct.UBInt8('month'), construct.UBInt8('day'), construct.UBInt8('hour'), construct.UBInt8('minutes'), construct.UBInt8('seconds'), construct.UBInt8('deciseconds'), construct.String('direction_from_utc', length=1, encoding='ascii'), construct.UBInt8('hours_from_utc'), construct.UBInt8('minutes_from_utc'), ) # Name of the pair. PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1)) # Specific CUPS IPP to generic name. _NAME_PAIR_TRANSLATION = { 'com.apple.print.JobInfo.PMApplicationName': 'application', 'com.apple.print.JobInfo.PMJobOwner': 'owner', 'DestinationPrinterID': 'printer_id', 'document-format': 'doc_type', 'job-name': 'job_name', 'job-originating-host-name': 'computer_name', 'job-originating-user-name': 'user', 'job-uuid': 'job_id', 'printer-uri': 'uri' } _DATE_TIME_VALUES = { 'date-time-at-creation': definitions.TIME_DESCRIPTION_CREATION, 'date-time-at-processing': definitions.TIME_DESCRIPTION_START, 'date-time-at-completed': definitions.TIME_DESCRIPTION_END } _POSIX_TIME_VALUES = { 'time-at-creation': definitions.TIME_DESCRIPTION_CREATION, 'time-at-processing': definitions.TIME_DESCRIPTION_START, 'time-at-completed': definitions.TIME_DESCRIPTION_END } _DATE_TIME_VALUE_NAMES = list(_DATE_TIME_VALUES.keys()) _DATE_TIME_VALUE_NAMES.extend(list(_POSIX_TIME_VALUES.keys())) def _GetStringValue(self, data_dict, name, default_value=None): """Retrieves a specific string value from the data dict. Args: data_dict (dict[str, list[str]): values per name. name (str): name of the value to retrieve. Returns: str: value represented as a string. """ values = data_dict.get(name, None) if not values: return default_value for index, value in enumerate(values): if ',' in value: values[index] = '"{0:s}"'.format(value) return ', '.join(values) def _ReadPair(self, parser_mediator, file_object): """Reads an attribute name and value pair from a CUPS IPP event. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Returns: tuple: contains: str: name or None. str: value or None. """ # Pair = Type ID + Name + Value. try: # Can be: # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. # IDtag = Tag ID (1byte) + '0x00'. type_id = self.INTEGER_8.parse_stream(file_object) if type_id == self.GROUP_END: return None, None elif type_id in self.GROUP_LIST: # If it is a group ID we must read the next byte that contains # the first TagID. type_id = self.INTEGER_8.parse_stream(file_object) # 0x00 separator character. self.INTEGER_8.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError( 'unable to parse pair identifier with error: {0!s}'.format( exception)) return None, None # Name = Length name + name + 0x00 try: pair_name = self.PAIR_NAME.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError( 'unable to parse pair name with error: {0!s}'.format( exception)) return None, None try: name = pair_name.text.decode('utf-8') except UnicodeDecodeError as exception: parser_mediator.ProduceExtractionError( 'unable to decode pair name with error: {0!s}'.format( exception)) return None, None # Value: can be integer, boolean or text select by Type ID. if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION): value_structure = self.INTEGER elif type_id == self.TYPE_BOOL: value_structure = self.BOOLEAN elif type_id == self.TYPE_DATETIME: value_structure = self.DATETIME else: value_structure = self.TEXT try: value = value_structure.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceExtractionError( 'unable to parse value with error: {0!s}'.format(exception)) return None, None if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION): value = value.integer elif type_id == self.TYPE_BOOL: value = bool(value.integer) elif type_id == self.TYPE_DATETIME: rfc2579_date_time_tuple = (value.year, value.month, value.day, value.hour, value.minutes, value.seconds, value.deciseconds, value.direction_from_utc, value.hours_from_utc, value.minutes_from_utc) value = dfdatetime_rfc2579_date_time.RFC2579DateTime( rfc2579_date_time_tuple=rfc2579_date_time_tuple) else: try: value = value.decode('utf-8') except UnicodeDecodeError as exception: parser_mediator.ProduceExtractionError( 'unable to decode value with error: {0!s}'.format( exception)) return None, None return name, value def _ReadPairs(self, parser_mediator, file_object): """Reads the attribute name and value pairs from a CUPS IPP event. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Returns: dict[str, list[str]]: values per name. """ data_dict = {} name, value = self._ReadPair(parser_mediator, file_object) while name or value: # Translate the known "name" CUPS IPP to a generic name value. pretty_name = self._NAME_PAIR_TRANSLATION.get(name, name) data_dict.setdefault(pretty_name, []).append(value) name, value = self._ReadPair(parser_mediator, file_object) return data_dict def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a CUPS IPP file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ try: header = self.CUPS_IPP_HEADER.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( 'Unable to parse CUPS IPP Header with error: {0!s}'.format( exception)) if (header.major_version != self.IPP_MAJOR_VERSION or header.minor_version != self.IPP_MINOR_VERSION): raise errors.UnableToParseFile( '[{0:s}] Unsupported version number.'.format(self.NAME)) if header.operation_id != self.IPP_OP_ID: # Warn if the operation ID differs from the standard one. We should be # able to parse the file nonetheless. logger.debug( '[{0:s}] Unsupported operation identifier in file: {1:s}.'. format(self.NAME, parser_mediator.GetDisplayName())) data_dict = self._ReadPairs(parser_mediator, file_object) time_dict = {} for name in self._DATE_TIME_VALUE_NAMES: value = data_dict.get(name, None) if value is not None: time_dict[name] = value del data_dict[name] event_data = CupsIppEventData() event_data.application = self._GetStringValue(data_dict, 'application') event_data.computer_name = self._GetStringValue( data_dict, 'computer_name') event_data.copies = data_dict.get('copies', [0])[0] event_data.data_dict = data_dict event_data.doc_type = self._GetStringValue(data_dict, 'doc_type') event_data.job_id = self._GetStringValue(data_dict, 'job_id') event_data.job_name = self._GetStringValue(data_dict, 'job_name') event_data.user = self._GetStringValue(data_dict, 'user') event_data.owner = self._GetStringValue(data_dict, 'owner') event_data.printer_id = self._GetStringValue(data_dict, 'printer_id') event_data.uri = self._GetStringValue(data_dict, 'uri') for name, usage in iter(self._DATE_TIME_VALUES.items()): time_values = time_dict.get(name, []) for date_time in time_values: event = time_events.DateTimeValuesEvent(date_time, usage) parser_mediator.ProduceEventWithEventData(event, event_data) for name, usage in iter(self._POSIX_TIME_VALUES.items()): time_values = time_dict.get(name, []) for time_value in time_values: date_time = dfdatetime_posix_time.PosixTime( timestamp=time_value) event = time_events.DateTimeValuesEvent(date_time, usage) parser_mediator.ProduceEventWithEventData(event, event_data)
class FirefoxCacheParser(parser.BaseParser): """Extract cached records from Firefox.""" NAME = 'firefox_cache' # Number of bytes allocated to a cache record metadata. RECORD_HEADER_SIZE = 36 # Initial size of Firefox >= 4 cache files. INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4 # Smallest possible block size in Firefox cache files. MIN_BLOCK_SIZE = 256 RECORD_HEADER_STRUCT = construct.Struct('record_header', construct.UBInt16('major'), construct.UBInt16('minor'), construct.UBInt32('location'), construct.UBInt32('fetch_count'), construct.UBInt32('last_fetched'), construct.UBInt32('last_modified'), construct.UBInt32('expire_time'), construct.UBInt32('data_size'), construct.UBInt32('request_size'), construct.UBInt32('info_size')) ALTERNATIVE_CACHE_NAME = (pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word("m", exact=1) + pyparsing.Word(pyparsing.nums, exact=2)) FIREFOX_CACHE_CONFIG = collections.namedtuple( u'firefox_cache_config', u'block_size first_record_offset') REQUEST_METHODS = [ u'GET', 'HEAD', 'POST', 'PUT', 'DELETE', u'TRACE', 'OPTIONS', 'CONNECT', 'PATCH' ] def __GetFirefoxConfig(self, file_entry): """Determine cache file block size. Raises exception if not found.""" if file_entry.name[0:9] != '_CACHE_00': try: # Match alternative filename. Five hex characters + 'm' + two digit # number, e.g. "01ABCm02". 'm' is for metadata. Cache files with 'd' # instead contain data only. self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name) except pyparsing.ParseException: raise errors.UnableToParseFile(u'Not a Firefox cache file.') file_object = file_entry.GetFileObject() # There ought to be a valid record within the first 4MB. We use this # limit to prevent reading large invalid files. to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() try: # We have not yet determined the block size, so we use the smallest # possible size. record = self.__NextRecord(file_entry.name, file_object, self.MIN_BLOCK_SIZE) record_size = (self.RECORD_HEADER_SIZE + record.request_size + record.info_size) if record_size >= 4096: # _CACHE_003_ block_size = 4096 elif record_size >= 1024: # _CACHE_002_ block_size = 1024 else: # _CACHE_001_ block_size = 256 return self.FIREFOX_CACHE_CONFIG(block_size, offset) except IOError: logging.debug(u'{0:s}:{1:d}: Invalid record.'.format( file_entry.name, offset)) raise errors.UnableToParseFile(u'Could not find a valid cache record. ' u'Not a Firefox cache file.') def __Accept(self, candidate, block_size): """Determine whether the candidate is a valid cache record.""" record_size = (self.RECORD_HEADER_SIZE + candidate.request_size + candidate.info_size) return (candidate.request_size > 0 and candidate.fetch_count > 0 and candidate.major == 1 and record_size // block_size < 256) def __NextRecord(self, filename, file_object, block_size): """Provide the next cache record.""" offset = file_object.get_offset() try: candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError): raise IOError(u'Unable to parse stream.') if not self.__Accept(candidate, block_size): # Move reader to next candidate block. file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR) raise IOError(u'Not a valid Firefox cache record.') # The last byte in a request is null. url = file_object.read(candidate.request_size)[:-1] # HTTP response header, even elements are keys, odd elements values. headers = file_object.read(candidate.info_size) request_method, _, _ = ( headers.partition('request-method\x00')[2].partition('\x00')) _, _, response_head = headers.partition('response-head\x00') response_code, _, _ = response_head.partition("\r\n") if request_method not in self.REQUEST_METHODS: logging.debug( u'{0:s}:{1:d}: Unknown HTTP method "{2:s}". Response "{3:s}"'. format(filename, offset, request_method, headers)) if response_code[0:4] != 'HTTP': logging.debug( u'{0:s}:{1:d}: Could not determine HTTP response code. ' u'Response headers: "{2:s}".'.format(filename, offset, headers)) # A request can span multiple blocks, so we use modulo. _, remainder = divmod(file_object.get_offset() - offset, block_size) # Move reader to next candidate block. Include the null-byte skipped above. file_object.seek(block_size - remainder, os.SEEK_CUR) return FirefoxCacheEvent(candidate, request_method, url, response_code) def Parse(self, file_entry): """Extract records from a Firefox cache file.""" firefox_config = self.__GetFirefoxConfig(file_entry) file_object = file_entry.GetFileObject() file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: yield self.__NextRecord(file_entry.name, file_object, firefox_config.block_size) except IOError: logging.debug(u'{0:s}:{1:d}: Invalid cache record.'.format( file_entry.name, file_object.get_offset() - self.MIN_BLOCK_SIZE))
class ASLParser(interface.FileObjectParser): """Parser for ASL log files.""" _INITIAL_FILE_OFFSET = None NAME = u'asl_log' DESCRIPTION = u'Parser for ASL log files.' _ASL_MAGIC = b'ASL DB\x00\x00\x00\x00\x00\x00' # ASL File header. # magic: magic number that identify ASL files. # version: version of the file. # offset: first record in the file. # timestamp: time when the first entry was written. # Contains the number of seconds since January 1, 1970 00:00:00 UTC. # last_offset: last record in the file. _ASL_HEADER_STRUCT = construct.Struct(u'asl_header_struct', construct.String(u'magic', 12), construct.UBInt32(u'version'), construct.UBInt64(u'offset'), construct.UBInt64(u'timestamp'), construct.UBInt32(u'cache_size'), construct.UBInt64(u'last_offset'), construct.Padding(36)) # The record structure is: # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry] # Record static structure. # tam_entry: it contains the number of bytes from this file position # until the end of the record, without counts itself. # next_offset: next record. If is equal to 0x00, it is the last record. # asl_message_id: integer that has the numeric identification of the event. # timestamp: the entry creation date and time. # Contains the number of seconds since January 1, 1970 00:00:00 UTC. # nanosecond: nanosecond to add to the timestamp. # level: level of priority. # pid: process identification that ask to save the record. # uid: user identification that has lunched the process. # gid: group identification that has lunched the process. # read_uid: identification id of a user. Only applied if is not -1 (all FF). # Only root and this user can read the entry. # read_gid: the same than read_uid, but for the group. _ASL_RECORD_STRUCT = construct.Struct(u'asl_record_struct', construct.Padding(2), construct.UBInt32(u'tam_entry'), construct.UBInt64(u'next_offset'), construct.UBInt64(u'asl_message_id'), construct.UBInt64(u'timestamp'), construct.UBInt32(u'nanosec'), construct.UBInt16(u'level'), construct.UBInt16(u'flags'), construct.UBInt32(u'pid'), construct.UBInt32(u'uid'), construct.UBInt32(u'gid'), construct.UBInt32(u'read_uid'), construct.UBInt32(u'read_gid'), construct.UBInt64(u'ref_pid')) _ASL_RECORD_STRUCT_SIZE = _ASL_RECORD_STRUCT.sizeof() # 8-byte fields, they can be: # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String]. # - Integer: integer that has the byte position in the file that points # to an ASL_RECORD_DYN_VALUE struct. If the value of the integer # is equal to 0, it means that it has not data (skip). # If the field is a String, we use this structure to decode each # integer byte in the corresponding character (ASCII Char). _ASL_OCTET_STRING = construct.ExprAdapter( construct.Octet(u'string'), encoder=lambda obj, ctx: ord(obj), decoder=lambda obj, ctx: chr(obj)) # Field string structure. If the first bit is 1, it means that it # is a String (1000) = 8, then the next nibble has the number of # characters. The last 7 bytes are the number of bytes. _ASL_STRING = construct.BitStruct( u'string', construct.Flag(u'type'), construct.Bits(u'filler', 3), construct.If(lambda ctx: ctx.type, construct.Nibble(u'string_length')), construct.If(lambda ctx: ctx.type, construct.Array(7, _ASL_OCTET_STRING))) # 8-byte pointer to a byte position in the file. _ASL_POINTER = construct.UBInt64(u'pointer') # Dynamic data structure pointed by a pointer that contains a String: # [2 bytes padding][4 bytes size of String][String]. _ASL_RECORD_DYN_VALUE = construct.Struct( u'asl_record_dyn_value', construct.Padding(2), construct.UBInt32(u'size'), construct.Bytes(u'value', lambda ctx: ctx.size)) def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an ALS file-like object. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) try: header = self._ASL_HEADER_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse ASL Header with error: {0:s}.'.format( exception)) if header.magic != self._ASL_MAGIC: raise errors.UnableToParseFile( u'Not an ASL Header, unable to parse.') offset = header.offset if not offset: return header_last_offset = header.last_offset previous_offset = offset event_object, offset = self.ReadASLEvent(parser_mediator, file_object, offset) while event_object: # Sanity check, the last read element must be the same as # indicated by the header. if offset == 0 and previous_offset != header_last_offset: parser_mediator.ProduceParseError( u'Unable to parse header. Last element header does not match ' u'header offset.') previous_offset = offset event_object, offset = self.ReadASLEvent(parser_mediator, file_object, offset) def ReadASLEvent(self, parser_mediator, file_object, offset): """Reads an ASL record at a specific offset. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object that points to an ASL file. offset: an integer containing the offset of the ASL record. Returns: A tuple of an event object extracted from the ASL record, and the offset to the next ASL record in the file. """ # The heap of the entry is saved to try to avoid seek (performance issue). # It has the real start position of the entry. dynamic_data_offset = file_object.tell() try: dynamic_data = file_object.read(offset - dynamic_data_offset) except IOError as exception: parser_mediator.ProduceParseError( u'unable to read ASL record dynamic data with error: {0:s}'. format(exception)) return None, None if not offset: return None, None try: record_struct = self._ASL_RECORD_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceParseError( u'unable to parse ASL record with error: {0:s}'.format( exception)) return None, None # Variable tam_fields = is the real length of the dynamic fields. # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before] # In Record_Struct we have a field called tam_entry, where it has the number # of bytes until the end of the entry from the position that the field is. # The tam_entry is between the 2th and the 6th byte in the [Record_Struct]. # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # Also, we do not need [Point_Entry_Before] and then we delete the size of # [Point_Entry_Before] that it is 8 bytes (8): # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8 # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2 tam_fields = record_struct.tam_entry - self._ASL_RECORD_STRUCT_SIZE - 2 # Dynamic part of the entry that contains minimal four fields of 8 bytes # plus 2 x [8 bytes] fields for each extra ASL_Field. # The four first fields are always the Host, Sender, Facility and Message. # After the four first fields, the entry might have extra ASL_Fields. # For each extra ASL_field, it has a pair of 8-byte fields where the first # 8 bytes contains the name of the extra ASL_field and the second 8 bytes # contains the text of the extra field. # All of this 8-byte field can be saved using one of these three different # types: # - Null value ('0000000000000000'): nothing to do. # - String: It is string if first bit = 1 or first nibble = 8 (1000). # Second nibble has the length of string. # The next 7 bytes have the text characters of the string # padding the end with null characters: '0x00'. # Example: [8468 6964 6400 0000] # [8] String, [4] length, value: [68 69 64 64] = hidd. # - Pointer: static position in the file to a special struct # implemented as an ASL_RECORD_DYN_VALUE. # Example: [0000 0000 0000 0077] # It points to the file position 0x077 that has a # ASL_RECORD_DYN_VALUE structure. values = [] while tam_fields > 0: try: field_data = file_object.read(8) except IOError as exception: parser_mediator.ProduceParseError( u'unable to read ASL field with error: {0:s}'.format( exception)) return None, None # Try to read the field data as a string. try: asl_string_struct = self._ASL_STRING.parse(field_data) string_data = b''.join( asl_string_struct.string[0:asl_string_struct. string_length]) values.append(string_data) # Go to parse the next extra field. tam_fields -= 8 continue except ValueError: pass # If the field is not a string it must be a pointer. try: pointer_value = self._ASL_POINTER.parse(field_data) except ValueError as exception: parser_mediator.ProduceParseError( u'unable to parse ASL field with error: {0:s}'.format( exception)) return None, None if not pointer_value: # Next extra field: 8 bytes more. tam_fields -= 8 continue # The next IF ELSE is only for performance issues, avoiding seek. # If the pointer points a lower position than where the actual entry # starts, it means that it points to a previous entry. pos = pointer_value - dynamic_data_offset # Greater or equal 0 means that the data is in the actual entry. if pos >= 0: try: dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse( dynamic_data[pos:]) dyn_value = dyn_value_struct.value.partition(b'\x00')[0] values.append(dyn_value) except (IOError, construct.FieldError) as exception: parser_mediator.ProduceParseError(( u'unable to parse ASL record dynamic value with error: ' u'{0:s}').format(exception)) return None, None else: # Only if it is a pointer that points to the # heap from another entry we use the seek method. main_position = file_object.tell() # If the pointer is in a previous entry. if main_position > pointer_value: file_object.seek(pointer_value - main_position, os.SEEK_CUR) try: dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream( file_object) dyn_value = dyn_value_struct.value.partition( b'\x00')[0] values.append(dyn_value) except (IOError, construct.FieldError): parser_mediator.ProduceParseError(( u'the pointer at {0:d} (0x{0:08x}) points to invalid ' u'information.' ).format(main_position - self._ASL_POINTER.sizeof())) # Come back to the position in the entry. _ = file_object.read(main_position - file_object.tell()) else: _ = file_object.read(pointer_value - main_position) dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream( file_object) dyn_value = dyn_value_struct.value.partition(b'\x00')[0] values.append(dyn_value) # Come back to the position in the entry. file_object.seek(main_position - file_object.tell(), os.SEEK_CUR) # Next extra field: 8 bytes more. tam_fields -= 8 # Read the last 8 bytes of the record that points to the previous entry. _ = file_object.read(8) # Parsed section, we translate the read data to an appropriate format. micro_seconds, _ = divmod(record_struct.nanosec, 1000) # Parsing the dynamic values (text or pointers to position with text). # The first four are always the host, sender, facility, and message. number_of_values = len(values) if number_of_values < 4: parser_mediator.ProduceParseError( u'less than four values read from an ASL event.') computer_name = u'N/A' sender = u'N/A' facility = u'N/A' message = u'N/A' if number_of_values >= 1: computer_name = values[0].decode(u'utf-8') if number_of_values >= 2: sender = values[1].decode(u'utf-8') if number_of_values >= 3: facility = values[2].decode(u'utf-8') if number_of_values >= 4: message = values[3].decode(u'utf-8') # If the entry has an extra fields, they works as a pairs: # The first is the name of the field and the second the value. extra_information = u'' if number_of_values > 4 and number_of_values % 2 == 0: # Taking all the extra attributes and merging them together, # eg: a = [1, 2, 3, 4] will look like "1: 2, 3: 4". try: extra_values = map(py2to3.UNICODE_TYPE, values[4:]) extra_information = u', '.join( map(u': '.join, zip(extra_values[0::2], extra_values[1::2]))) except UnicodeDecodeError as exception: parser_mediator.ProduceParseError( u'Unable to decode all ASL values in the extra information fields.' ) event_object = ASLEvent(record_struct.timestamp, offset, record_struct.asl_message_id, record_struct.level, record_struct.pid, record_struct.uid, record_struct.gid, record_struct.read_uid, record_struct.read_gid, computer_name, sender, facility, message, extra_information, micro_seconds=micro_seconds) parser_mediator.ProduceEvent(event_object) return (event_object, record_struct.next_offset)
class JavaIDXParser(interface.BaseParser): """Parse Java IDX files for download events. There are five structures defined. 6.02 files had one generic section that retained all data. From 6.03, the file went to a multi-section format where later sections were optional and had variable-lengths. 6.03, 6.04, and 6.05 files all have their main data section (#2) begin at offset 128. The short structure is because 6.05 files deviate after the 8th byte. So, grab the first 8 bytes to ensure it's valid, get the file version, then continue on with the correct structures. """ NAME = 'java_idx' DESCRIPTION = u'Parser for Java IDX files.' IDX_SHORT_STRUCT = construct.Struct('magic', construct.UBInt8('busy'), construct.UBInt8('incomplete'), construct.UBInt32('idx_version')) IDX_602_STRUCT = construct.Struct( 'IDX_602_Full', construct.UBInt16('null_space'), construct.UBInt8('shortcut'), construct.UBInt32('content_length'), construct.UBInt64('last_modified_date'), construct.UBInt64('expiration_date'), construct.PascalString('version_string', length_field=construct.UBInt16('length')), construct.PascalString('url', length_field=construct.UBInt16('length')), construct.PascalString('namespace', length_field=construct.UBInt16('length')), construct.UBInt32('FieldCount')) IDX_605_SECTION_ONE_STRUCT = construct.Struct( 'IDX_605_Section1', construct.UBInt8('shortcut'), construct.UBInt32('content_length'), construct.UBInt64('last_modified_date'), construct.UBInt64('expiration_date'), construct.UBInt64('validation_date'), construct.UBInt8('signed'), construct.UBInt32('sec2len'), construct.UBInt32('sec3len'), construct.UBInt32('sec4len')) IDX_605_SECTION_TWO_STRUCT = construct.Struct( 'IDX_605_Section2', construct.PascalString('version', length_field=construct.UBInt16('length')), construct.PascalString('url', length_field=construct.UBInt16('length')), construct.PascalString('namespec', length_field=construct.UBInt16('length')), construct.PascalString('ip_address', length_field=construct.UBInt16('length')), construct.UBInt32('FieldCount')) # Java uses Pascal-style strings, but with a 2-byte length field. JAVA_READUTF_STRING = construct.Struct( 'Java.ReadUTF', construct.PascalString('string', length_field=construct.UBInt16('length'))) def Parse(self, parser_context, file_entry): """Extract data from a Java cache IDX file. This is the main parsing engine for the parser. It determines if the selected file is a proper IDX file. It then checks the file version to determine the correct structure to apply to extract data. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() try: magic = self.IDX_SHORT_STRUCT.parse_stream(file_object) except (IOError, construct.FieldError) as exception: raise errors.UnableToParseFile( u'Unable to parse Java IDX file with error: {0:s}.'.format( exception)) # Fields magic.busy and magic.incomplete are normally 0x00. They # are set to 0x01 if the file is currently being downloaded. Logic # checks for > 1 to avoid a race condition and still reject any # file with other data. # Field magic.idx_version is the file version, of which only # certain versions are supported. if magic.busy > 1 or magic.incomplete > 1: raise errors.UnableToParseFile(u'Not a valid Java IDX file') if not magic.idx_version in [602, 603, 604, 605]: raise errors.UnableToParseFile(u'Not a valid Java IDX file') # Obtain the relevant values from the file. The last modified date # denotes when the file was last modified on the HOST. For example, # when the file was uploaded to a web server. if magic.idx_version == 602: section_one = self.IDX_602_STRUCT.parse_stream(file_object) last_modified_date = section_one.last_modified_date url = section_one.url ip_address = 'Unknown' http_header_count = section_one.FieldCount elif magic.idx_version in [603, 604, 605]: # IDX 6.03 and 6.04 have two unused bytes before the structure. if magic.idx_version in [603, 604]: file_object.read(2) # IDX 6.03, 6.04, and 6.05 files use the same structures for the # remaining data. section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream( file_object) last_modified_date = section_one.last_modified_date if file_object.get_size() > 128: file_object.seek(128) # Static offset for section 2. section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream( file_object) url = section_two.url ip_address = section_two.ip_address http_header_count = section_two.FieldCount else: url = 'Unknown' ip_address = 'Unknown' http_header_count = 0 # File offset is now just prior to HTTP headers. Make sure there # are headers, and then parse them to retrieve the download date. download_date = None for field in range(0, http_header_count): field = self.JAVA_READUTF_STRING.parse_stream(file_object) value = self.JAVA_READUTF_STRING.parse_stream(file_object) if field.string == 'date': # Time string "should" be in UTC or have an associated time zone # information in the string itself. If that is not the case then # there is no reliable method for plaso to determine the proper # timezone, so the assumption is that it is UTC. download_date = timelib.Timestamp.FromTimeString( value.string, gmt_as_timezone=False) if not url or not ip_address: raise errors.UnableToParseFile( u'Unexpected Error: URL or IP address not found in file.') last_modified_timestamp = timelib.Timestamp.FromJavaTime( last_modified_date) # TODO: Move the timestamp description fields into eventdata. event_object = JavaIDXEvent(last_modified_timestamp, 'File Hosted Date', magic.idx_version, url, ip_address) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if section_one: expiration_date = section_one.get('expiration_date', None) if expiration_date: expiration_timestamp = timelib.Timestamp.FromJavaTime( expiration_date) event_object = JavaIDXEvent(expiration_timestamp, 'File Expiration Date', magic.idx_version, url, ip_address) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) if download_date: event_object = JavaIDXEvent( download_date, eventdata.EventTimestamp.FILE_DOWNLOADED, magic.idx_version, url, ip_address) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry)
#!/usr/bin/python import binascii import construct import datetime import sys from binplist import binplist HFS_to_Epoch = 2082844800 s_alias = construct.Struct('plist_alias', construct.Padding(4), construct.UBInt16('length'), construct.Padding(6), construct.UBInt32('timestamp1'), construct.Padding(18), construct.UBInt32('timestamp2'), construct.Padding(20)) s_type = construct.UBInt16('type') s_volume = construct.Struct( 'volume', construct.UBInt16('volume1_length'), construct.UBInt16('characters1'), construct.String('volume1', lambda ctx: ctx.characters1 * 2), construct.Padding(2), construct.UBInt16('volume2_length'), construct.UBInt16('characters2'), construct.String('volume2', lambda ctx: ctx.characters2 * 2)) s_mount_point = construct.PascalString( 'mount_point', length_field=construct.UBInt16('length'))