Python UBInt16 Examples

Programming Language: Python

Namespace/Package Name: construct

Method/Function: UBInt16

Examples at hotexamples.com: 29

Python UBInt16 - 29 examples found. These are the top rated real world Python examples of construct.UBInt16 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def _decode(self, obj, context):
     return '{:08x}-{:04x}-{:04x}-{:04x}-{:s}'.format(
         construct.ULInt32('foo').parse(obj[0:4]),
         construct.ULInt16('foo').parse(obj[4:6]),
         construct.ULInt16('foo').parse(obj[6:8]),
         construct.UBInt16('foo').parse(obj[8:10]),
         obj[10:16].encode('hex'))

Example #2

Show file

File: drc-sim.py Project: athairus/drc-sim-keyboard

 def __init__(s):
     s.header_cmd0 = construct.Struct('CMD0Header',
         construct.UBInt8('magic'),
         construct.UBInt8('unk_0'),
         construct.UBInt8('unk_1'),
         construct.UBInt8('unk_2'),
         construct.UBInt8('unk_3'),
         construct.UBInt8('flags'),
         construct.UBInt8('id_primary'),
         construct.UBInt8('id_secondary'),
         construct.UBInt16('error_code'),
         construct.UBInt16('payload_size_cmd0')
     )
     s.header_cmd1 = construct.Struct('CMD1Header',
         construct.Padding(48)
     )
     s.header_cmd2 = construct.Struct('CMD2Header',
         construct.ULInt16('JDN_base'),
         construct.Padding(2),
         construct.ULInt32('seconds')
     )
     s.header = construct.Struct('CMDHeader',
         construct.ULInt16('packet_type'),
         construct.ULInt16('cmd_id'),
         construct.ULInt16('payload_size'),
         construct.ULInt16('seq_id'),
         construct.Switch('cmd_hdr', lambda ctx: ctx.cmd_id,
             {
                 0 : construct.If(lambda ctx: ctx.payload_size >= s.header_cmd0.sizeof(), construct.Embed(s.header_cmd0)),
                 1 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd1.sizeof(), construct.Embed(s.header_cmd1)),
                 2 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd2.sizeof(), construct.Embed(s.header_cmd2))
             },
             default = construct.Pass
         )
     )
     s.cmd_handlers = {
         0 : s.cmd0,
         1 : s.cmd1,
         2 : s.cmd2
     }
     s.cmd0_handlers = {
         5 : { 6 : s.cmd0_5_6 },
     }

Example #3

Show file

  def _pmtu_probe_do(self):
    """
    Periodically probes PMTU.
    """
    if not self.manager.config.getboolean("broker", "pmtu_discovery"):
      return

    probe_interval = 15
    while True:
      gevent.sleep(probe_interval)

      # Reset measured PMTU
      self.probed_pmtu = 0
      self.num_pmtu_probes = 0
      self.num_pmtu_replies = 0

      # Transmit PMTU probes of different sizes multiple times
      for _ in xrange(4):
        for size in [1334, 1400, 1450, 1476, 1492, 1500]:
          try:
            msg = ControlMessage.build(cs.Container(
              magic1 = 0x80,
              magic2 = 0x73A7,
              version = 1,
              type = CONTROL_TYPE_PMTUD,
              data = ""
            ))
            # We need to subtract 6 because ControlMessage gets auto-padded to 12 bytes
            msg += '\x00' * (size - IPV4_HDR_OVERHEAD - L2TP_CONTROL_SIZE - 6)

            self.socket.send(msg)
            self.num_pmtu_probes += 1
          except gsocket.error:
            pass

        gevent.sleep(1)

      # Collect all acknowledgements
      if self.num_pmtu_probes != self.num_pmtu_replies:
        gevent.sleep(3)

      detected_pmtu = max(self.probed_pmtu - L2TP_TUN_OVERHEAD, 1280)
      if not self.probed_pmtu or not self.num_pmtu_replies:
        logger.warning("Got no replies to any PMTU probes for tunnel %d." % self.id)
        continue
      elif detected_pmtu > 0 and detected_pmtu != self.pmtu:
        self.pmtu = detected_pmtu
        self._update_mtu()

      # Notify the client of the detected PMTU
      self.handler.send_message(self.socket, CONTROL_TYPE_PMTU_NTFY,
        cs.UBInt16("mtu").build(self.pmtu))

      # Increase probe interval until it reaches 10 minutes
      probe_interval = min(600, probe_interval * 2)

Example #4

Show file

File: _constructs.py Project: semaj90/tls

def Opaque(subcon):  # noqa
    """
    An `opaque`_ sequence of bytes.  Such a sequence consists of a 16
    bit integer followed that many bytes.  It behaves like
    :py:class:`TLSPrefixedArray` except that it returns a single
    construct instance and not a sequence of them.

    :param subcon: The construct to wrap.
    :type subcon: :py:class:`construct.Construct`

    .. _opaque:
        https://tools.ietf.org/html/rfc5246#section-4.3

    """

    length_field = construct.UBInt16(subcon.name + "_opaque_length")
    return construct.TunnelAdapter(
        PrefixedBytes(subcon.name, length_field),
        subcon,
    )

Example #5

Show file

 construct.ULInt32('offset'),  # offset to "FirstToC Offset"
 _POINTER(
     lambda ctx: ctx.offset,
     construct.Struct(
         'ftoc_offset',
         construct.Anchor('abs_offset'),
         construct.ULInt32('offset'),
         _POINTER(
             lambda ctx: ctx.abs_offset + ctx.offset,
             _REPEAT(
                 lambda obj, ctx: obj.offset == 0x00000000,
                 construct.Struct(
                     'toc',
                     construct.ULInt32('length'),
                     construct.SLInt16('type'),
                     construct.UBInt16('flag'),
                     construct.ULInt32('level'),
                     construct.ULInt32(
                         'offset'),  # offset to next ToC (0 if none)
                     construct.ULInt32('count'),
                     _ARRAY(
                         lambda ctx: ctx.count,
                         construct.Struct(
                             'record',
                             construct.ULInt16('type'),
                             construct.ULInt16('flag'),
                             construct.ULInt64(
                                 'offset'),  # offset to data record
                             _POINTER(
                                 lambda ctx: ctx._._.abs_offset + ctx.
                                 offset,

Example #6

Show file

                                     construct.UBInt64('last_offset'),
                                     construct.Padding(36))

# Record = [Heap][Record_Struct][Values]
# Heap = [Group of Dyn_Value]*
# Values = [ADDR_TXT][ADDR_TXT][ADDR_TXT][ADDR_TXT](2x[ADDR_TXT])*
#            (Host)   (Sender) (Facility) (message)

# Record Struct
ASL_RECORD_STRUCT = construct.Struct('asl_record_struct', construct.Padding(2),
                                     construct.UBInt32('tam_entry'),
                                     construct.UBInt64('next_offset'),
                                     construct.UBInt64('ASLMessageID'),
                                     construct.UBInt64('timestamp'),
                                     construct.UBInt32('nanosec'),
                                     construct.UBInt16('level'),
                                     construct.UBInt16('flags'),
                                     construct.UBInt32('pid'),
                                     construct.UBInt32('uid'),
                                     construct.UBInt32('gid'),
                                     construct.UBInt32('read_uid'),
                                     construct.UBInt32('read_gid'),
                                     construct.UBInt64('ref_pid'))

# Pointer Values
ASL_RECORD_ADDR_TXT = construct.Struct('addr_or_text',
                                       construct.String('addr_txt', 8))

# Pointer Dynamic Value
ASL_RECORD_DYN_VALUE = construct.Struct(
    'asl_record_text_header', construct.Padding(2),

Example #7

Show file

    def _run(self):
        """
    Starts listening for control messages via the tunnel socket.
    """
        while True:
            # Receive control messages from the socket
            try:
                data, address = self.socket.recvfrom(2048)
            except gsocket.error, e:
                if e.errno in (90, 97):
                    # Ignore EMSGSIZE errors as they ocurr when performing PMTU discovery
                    # and remote nodes send us ICMP fragmentation needed messages
                    continue
                elif e.errno != 9:
                    if self.manager.config.getboolean('log',
                                                      'log_ip_addresses'):
                        logger.error(
                            "Socket error %d (%s) in tunnel %d with %s:%d!" %
                            (e.errno, e.strerror, self.id, self.endpoint[0],
                             self.endpoint[1]))
                    else:
                        logger.error("Socket error %d (%s) in tunnel %d!" %
                                     (e.errno, e.strerror, self.id))
                else:
                    logger.info("Closing control channel for tunnel %d." %
                                self.id)

                return

            if address != self.endpoint:
                # Ignore messages from unknown sources
                continue

            # All packets count as liveness indicators
            self.keep_alive()

            msg = self.handler.handle(self.socket, data, address)
            if msg is None:
                # Message has been handled or is invalid
                continue
            elif msg.type == CONTROL_TYPE_ERROR:
                logger.warning(
                    "Error message received from client, tearing down tunnel %d."
                    % self.id)
                gevent.spawn(self.manager.close_tunnel, self)
                return
            elif msg.type == CONTROL_TYPE_PMTUD:
                if not self.manager.config.getboolean("broker",
                                                      "pmtu_discovery"):
                    continue

                # Reply with ACK packet
                self.handler.send_message(self.socket, CONTROL_TYPE_PMTUD_ACK,
                                          cs.UBInt16("size").build(len(data)))
            elif msg.type == CONTROL_TYPE_PMTUD_ACK:
                # Decode ACK packet and extract size
                psize = cs.UBInt16("size").parse(msg.data) + IPV4_HDR_OVERHEAD

                if psize > self.probed_pmtu:
                    self.probed_pmtu = psize
            elif msg.type & MASK_CONTROL_TYPE_RELIABLE:
                # Reliable messages that require ACK, transmit one now
                data = msg.data[2:]
                self.handler.send_message(self.socket, CONTROL_TYPE_REL_ACK,
                                          msg.data[:2])

                if msg.type == CONTROL_TYPE_LIMIT:
                    # Client requests limit configuration
                    try:
                        limit = LimitMessage.parse(data)
                    except cs.ConstructError:
                        logger.warning(
                            "Invalid limit control message received on tunnel %d."
                            % self.id)
                        return

                    if not self.limits.configure(limit):
                        logger.warning(
                            "Unknown type of limit (%d) requested on tunnel %d."
                            % (limit.type, self.id))
                        return

Example #8

Show file

import signal
import struct
import sys
import traceback
import traffic_control

# Control message for our protocol; first few bits are special as we have to
# maintain compatibility with LTPv3 in the kernel (first bit must be 1); also
# the packet must be at least 12 bytes in length, otherwise some firewalls
# may filter it when used over port 53
ControlMessage = cs.Struct(
    "control",
    # Ensure that the first bit is 1 (L2TP control packet)
    cs.Const(cs.UBInt8("magic1"), 0x80),
    # Reduce conflict matching to other protocols as we run on port 53
    cs.Const(cs.UBInt16("magic2"), 0x73A7),
    # Protocol version to allow future upgrades
    cs.UBInt8("version"),
    # Message type
    cs.UBInt8("type"),
    # Message data (with length prefix)
    cs.PascalString("data"),
    # Pad the message so it is at least 12 bytes long
    cs.Padding(lambda ctx: max(0, 6 - len(ctx["data"]))),
)

# Unreliable messages (0x00 - 0x7F)
CONTROL_TYPE_COOKIE = 0x01
CONTROL_TYPE_PREPARE = 0x02
CONTROL_TYPE_ERROR = 0x03
CONTROL_TYPE_TUNNEL = 0x04

Example #9

Show file

File: cups_ipp.py Project: trx1138/mac-osx-forensics

"""Cups Reading Control Files."""

# IMPORTANT: DIRTY PARSE...

# MSc Project in Royal Holloway, University of London.
__author__ = 'Joaquin Moreno Garijo ([email protected])'

import datetime
import construct
import sys

header = construct.Padding(11)
attr_id = construct.UBInt8('type')
attr_text = construct.CString('text')
attr_time = construct.Struct('time', construct.UBInt32('timestamp'),
                             construct.UBInt16('other'))


class ControlFile(object):
    def __init__(self):
        self.crt_time = 0
        self.proc_time = 0
        self.comp_time = 0
        self.data = []


def printValue(name, value):
    # print u'{}: {}'.format(name, value)
    if type(name) != str and type(name) != unicode:
        return
    elif name == u'printer-uri':

Example #10

Show file

File: cups_ipp.py Project: burdzwastaken/plaso

class CupsIppParser(interface.FileObjectParser):
    """Parser for CUPS IPP files. """

    NAME = u'cups_ipp'
    DESCRIPTION = u'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct(u'cups_ipp_header_struct',
                                       construct.UBInt8(u'major_version'),
                                       construct.UBInt8(u'minor_version'),
                                       construct.UBInt16(u'operation_id'),
                                       construct.UBInt32(u'request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID, per cups source file ipp-support.c.
    TYPE_GENERAL_INTEGER = 0x20
    TYPE_INTEGER = 0x21
    TYPE_BOOL = 0x22
    TYPE_ENUMERATION = 0x23
    TYPE_DATETIME = 0x31

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8(u'integer')
    INTEGER_32 = construct.UBInt32(u'integer')
    TEXT = construct.PascalString(u'text',
                                  encoding='utf-8',
                                  length_field=construct.UBInt8(u'length'))
    BOOLEAN = construct.Struct(u'boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct(u'integer_value', construct.Padding(1),
                               INTEGER_32)

    # This is an RFC 2579 datetime.
    DATETIME = construct.Struct(
        u'datetime',
        construct.Padding(1),
        construct.UBInt16(u'year'),
        construct.UBInt8(u'month'),
        construct.UBInt8(u'day'),
        construct.UBInt8(u'hour'),
        construct.UBInt8(u'minutes'),
        construct.UBInt8(u'seconds'),
        construct.UBInt8(u'deciseconds'),
        construct.String(u'direction_from_utc', length=1, encoding='ascii'),
        construct.UBInt8(u'hours_from_utc'),
        construct.UBInt8(u'minutes_from_utc'),
    )

    # Name of the pair.
    PAIR_NAME = construct.Struct(u'pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    NAME_PAIR_TRANSLATION = {
        u'printer-uri': u'uri',
        u'job-uuid': u'job_id',
        u'DestinationPrinterID': u'printer_id',
        u'job-originating-user-name': u'user',
        u'job-name': u'job_name',
        u'document-format': u'doc_type',
        u'job-originating-host-name': u'computer_name',
        u'com.apple.print.JobInfo.PMApplicationName': u'application',
        u'com.apple.print.JobInfo.PMJobOwner': u'owner'
    }

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a CUPS IPP file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse CUPS IPP Header with error: {0:s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            raise errors.UnableToParseFile(
                u'[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logging.debug(
                u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_mediator.GetDisplayName()))

        # Read the pairs extracting the name and the value.
        data_dict = {}
        name, value = self.ReadPair(parser_mediator, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self.ReadPair(parser_mediator, file_object)

        # TODO: Refactor to use a lookup table to do event production.
        time_dict = {}
        for key, value in data_dict.items():
            if key.startswith(u'date-time-') or key.startswith(u'time-'):
                time_dict[key] = value
                del data_dict[key]

        if u'date-time-at-creation' in time_dict:
            event_object = CupsIppEvent(time_dict[u'date-time-at-creation'][0],
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'date-time-at-processing' in time_dict:
            event_object = CupsIppEvent(
                time_dict[u'date-time-at-processing'][0],
                eventdata.EventTimestamp.START_TIME, data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'date-time-at-completed' in time_dict:
            event_object = CupsIppEvent(
                time_dict[u'date-time-at-completed'][0],
                eventdata.EventTimestamp.END_TIME, data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-creation' in time_dict:
            time_value = time_dict[u'time-at-creation'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-processing' in time_dict:
            time_value = time_dict[u'time-at-processing'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.START_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-completed' in time_dict:
            time_value = time_dict[u'time-at-completed'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.END_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

    def ReadPair(self, parser_mediator, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to a file.

    Returns:
      A list of name and value. If name and value cannot be read both are
      set to None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            _ = self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported identifier in file: {1:s}.'.format(
                    self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        # Name = Length name + name + 0x00
        try:
            name = self.PAIR_NAME.parse_stream(file_object).text
        except (IOError, construct.FieldError):
            logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format(
                self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        try:
            if type_id in [
                    self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                    self.TYPE_ENUMERATION
            ]:
                value = self.INTEGER.parse_stream(file_object).integer

            elif type_id == self.TYPE_BOOL:
                value = bool(self.BOOLEAN.parse_stream(file_object).integer)

            elif type_id == self.TYPE_DATETIME:
                datetime = self.DATETIME.parse_stream(file_object)
                value = timelib.Timestamp.FromRFC2579Datetime(
                    datetime.year, datetime.month, datetime.day, datetime.hour,
                    datetime.minutes, datetime.seconds, datetime.deciseconds,
                    datetime.direction_from_utc, datetime.hours_from_utc,
                    datetime.minutes_from_utc)

            else:
                value = self.TEXT.parse_stream(file_object)

        except (IOError, UnicodeDecodeError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported value in file: {1:s}.'.format(
                    self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        return name, value

Example #11

Show file

File: cpio.py Project: kalnyc1/assorted

class CPIOArchiveFile(object):
  """Class that contains a CPIO archive file.

  Attributes:
    file_format (str): CPIO file format.
  """

  _CPIO_SIGNATURE_BINARY_BIG_ENDIAN = b'\x71\xc7'
  _CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN = b'\xc7\x71'
  _CPIO_SIGNATURE_PORTABLE_ASCII = b'070707'
  _CPIO_SIGNATURE_NEW_ASCII = b'070701'
  _CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM = b'070702'

  _CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct(
      u'cpio_binary_big_endian_file_entry',
      construct.UBInt16(u'signature'),
      construct.UBInt16(u'device_number'),
      construct.UBInt16(u'inode_number'),
      construct.UBInt16(u'mode'),
      construct.UBInt16(u'user_identifier'),
      construct.UBInt16(u'group_identifier'),
      construct.UBInt16(u'number_of_links'),
      construct.UBInt16(u'special_device_number'),
      construct.UBInt16(u'modification_time_upper'),
      construct.UBInt16(u'modification_time_lower'),
      construct.UBInt16(u'path_string_size'),
      construct.UBInt16(u'file_size_upper'),
      construct.UBInt16(u'file_size_lower'))

  _CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct(
      u'cpio_binary_little_endian_file_entry',
      construct.ULInt16(u'signature'),
      construct.ULInt16(u'device_number'),
      construct.ULInt16(u'inode_number'),
      construct.ULInt16(u'mode'),
      construct.ULInt16(u'user_identifier'),
      construct.ULInt16(u'group_identifier'),
      construct.ULInt16(u'number_of_links'),
      construct.ULInt16(u'special_device_number'),
      construct.ULInt16(u'modification_time_upper'),
      construct.ULInt16(u'modification_time_lower'),
      construct.ULInt16(u'path_string_size'),
      construct.ULInt16(u'file_size_upper'),
      construct.ULInt16(u'file_size_lower'))

  _CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT = construct.Struct(
      u'cpio_portable_ascii_file_entry',
      construct.Bytes(u'signature', 6),
      construct.Bytes(u'device_number', 6),
      construct.Bytes(u'inode_number', 6),
      construct.Bytes(u'mode', 6),
      construct.Bytes(u'user_identifier', 6),
      construct.Bytes(u'group_identifier', 6),
      construct.Bytes(u'number_of_links', 6),
      construct.Bytes(u'special_device_number', 6),
      construct.Bytes(u'modification_time', 11),
      construct.Bytes(u'path_string_size', 6),
      construct.Bytes(u'file_size', 11))

  _CPIO_NEW_ASCII_FILE_ENTRY_STRUCT = construct.Struct(
      u'cpio_portable_ascii_file_entry',
      construct.Bytes(u'signature', 6),
      construct.Bytes(u'inode_number', 8),
      construct.Bytes(u'mode', 8),
      construct.Bytes(u'user_identifier', 8),
      construct.Bytes(u'group_identifier', 8),
      construct.Bytes(u'number_of_links', 8),
      construct.Bytes(u'modification_time', 8),
      construct.Bytes(u'file_size', 8),
      construct.Bytes(u'device_major_number', 8),
      construct.Bytes(u'device_minor_number', 8),
      construct.Bytes(u'special_device_major_number', 8),
      construct.Bytes(u'special_device_minor_number', 8),
      construct.Bytes(u'path_string_size', 8),
      construct.Bytes(u'checksum', 8))

  def __init__(self, debug=False):
    """Initializes the CPIO archive file object.

    Args:
      debug (Optional[bool]): True if debug information should be printed.
    """
    super(CPIOArchiveFile, self).__init__()
    self._debug = debug
    self._file_entries = None
    self._file_object = None
    self._file_object_opened_in_object = False
    self._file_size = 0

    self.file_format = None
    self.size = None

  def _ReadFileEntry(self, file_offset):
    """Reads a file entry.

    Args:
      file_offset (int): current file offset.

    Raises:
      IOError: if the file entry cannot be read.
    """
    if self._debug:
      print(u'Seeking file entry at offset: 0x{0:08x}'.format(file_offset))

    self._file_object.seek(file_offset, os.SEEK_SET)

    if self.file_format == u'bin-big-endian':
      file_entry_struct = self._CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT
    elif self.file_format == u'bin-little-endian':
      file_entry_struct = self._CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT
    elif self.file_format == u'odc':
      file_entry_struct = self._CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT
    elif self.file_format in (u'crc', u'newc'):
      file_entry_struct = self._CPIO_NEW_ASCII_FILE_ENTRY_STRUCT

    file_entry_struct_size = file_entry_struct.sizeof()
    file_entry_data = self._file_object.read(file_entry_struct_size)
    file_offset += file_entry_struct_size

    if self._debug:
      print(u'File entry data:')
      print(hexdump.Hexdump(file_entry_data))

    try:
      file_entry_struct = file_entry_struct.parse(file_entry_data)
    except construct.FieldError as exception:
      raise IOError((
          u'Unable to parse file entry data section with error: '
          u'{0:s}').file_format(exception))

    if self.file_format in (u'bin-big-endian', u'bin-little-endian'):
      inode_number = file_entry_struct.inode_number
      mode = file_entry_struct.mode
      user_identifier = file_entry_struct.user_identifier
      group_identifier = file_entry_struct.group_identifier

      modification_time = (
          (file_entry_struct.modification_time_upper << 16) |
          file_entry_struct.modification_time_lower)

      path_string_size = file_entry_struct.path_string_size

      file_size = (
          (file_entry_struct.file_size_upper << 16) |
          file_entry_struct.file_size_lower)

    elif self.file_format == u'odc':
      inode_number = int(file_entry_struct.inode_number, 8)
      mode = int(file_entry_struct.mode, 8)
      user_identifier = int(file_entry_struct.user_identifier, 8)
      group_identifier = int(file_entry_struct.group_identifier, 8)
      modification_time = int(file_entry_struct.modification_time, 8)
      path_string_size = int(file_entry_struct.path_string_size, 8)
      file_size = int(file_entry_struct.file_size, 8)

    elif self.file_format in (u'crc', u'newc'):
      inode_number = int(file_entry_struct.inode_number, 16)
      mode = int(file_entry_struct.mode, 16)
      user_identifier = int(file_entry_struct.user_identifier, 16)
      group_identifier = int(file_entry_struct.group_identifier, 16)
      modification_time = int(file_entry_struct.modification_time, 16)
      path_string_size = int(file_entry_struct.path_string_size, 16)
      file_size = int(file_entry_struct.file_size, 16)

    if self._debug:
      if self.file_format in (u'bin-big-endian', u'bin-little-endian'):
        print(u'Signature\t\t\t\t\t\t\t\t: 0x{0:04x}'.format(
            file_entry_struct.signature))
      else:
        print(u'Signature\t\t\t\t\t\t\t\t: {0!s}'.format(
            file_entry_struct.signature))

      if self.file_format not in (u'crc', u'newc'):
        if self.file_format in (u'bin-big-endian', u'bin-little-endian'):
          device_number = file_entry_struct.device_number
        elif self.file_format == u'odc':
          device_number = int(file_entry_struct.device_number, 8)

        print(u'Device number\t\t\t\t\t\t\t\t: {0:d}'.format(device_number))

      print(u'Inode number\t\t\t\t\t\t\t\t: {0:d}'.format(inode_number))
      print(u'Mode\t\t\t\t\t\t\t\t\t: {0:o}'.format(mode))

      print(u'User identifier (UID)\t\t\t\t\t\t\t: {0:d}'.format(
          user_identifier))

      print(u'Group identifier (GID)\t\t\t\t\t\t\t: {0:d}'.format(
          group_identifier))

      if self.file_format in (u'bin-big-endian', u'bin-little-endian'):
        number_of_links = file_entry_struct.number_of_links
      elif self.file_format == u'odc':
        number_of_links = int(file_entry_struct.number_of_links, 8)
      elif self.file_format in (u'crc', u'newc'):
        number_of_links = int(file_entry_struct.number_of_links, 16)

      print(u'Number of links\t\t\t\t\t\t\t\t: {0:d}'.format(number_of_links))

      if self.file_format not in (u'crc', u'newc'):
        if self.file_format in (u'bin-big-endian', u'bin-little-endian'):
          special_device_number = file_entry_struct.special_device_number
        elif self.file_format == u'odc':
          special_device_number = int(
              file_entry_struct.special_device_number, 8)

        print(u'Special device number\t\t\t\t\t\t\t\t: {0:d}'.format(
            special_device_number))

      print(u'Modification time\t\t\t\t\t\t\t: {0:d}'.format(modification_time))

      if self.file_format not in (u'crc', u'newc'):
        print(u'Path string size\t\t\t\t\t\t\t: {0:d}'.format(path_string_size))

      print(u'File size\t\t\t\t\t\t\t\t: {0:d}'.format(file_size))

      if self.file_format in (u'crc', u'newc'):
        device_major_number = int(file_entry_struct.device_major_number, 16)

        print(u'Device major number\t\t\t\t\t\t\t: {0:d}'.format(
            device_major_number))

        device_minor_number = int(file_entry_struct.device_minor_number, 16)

        print(u'Device minor number\t\t\t\t\t\t\t: {0:d}'.format(
            device_minor_number))

        special_device_major_number = int(
            file_entry_struct.special_device_major_number, 16)

        print(u'Special device major number\t\t\t\t\t\t: {0:d}'.format(
            special_device_major_number))

        special_device_minor_number = int(
            file_entry_struct.special_device_minor_number, 16)

        print(u'Special device minor number\t\t\t\t\t\t: {0:d}'.format(
            special_device_minor_number))

        print(u'Path string size\t\t\t\t\t\t\t: {0:d}'.format(path_string_size))

        checksum = int(file_entry_struct.checksum, 16)

        print(u'Checksum\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(checksum))

    path_string_data = self._file_object.read(path_string_size)
    file_offset += path_string_size

    # TODO: should this be ASCII?
    path_string = path_string_data.decode(u'ascii')
    path_string, _, _ = path_string.partition(u'\x00')

    if self._debug:
      print(u'Path string\t\t\t\t\t\t\t\t: {0:s}'.format(path_string))

    if self.file_format in (u'bin-big-endian', u'bin-little-endian'):
      padding_size = file_offset % 2
      if padding_size > 0:
        padding_size = 2 - padding_size

    elif self.file_format == u'odc':
      padding_size = 0

    elif self.file_format in (u'crc', u'newc'):
      padding_size = file_offset % 4
      if padding_size > 0:
        padding_size = 4 - padding_size

    if self._debug:
      padding_data = self._file_object.read(padding_size)
      print(u'Path string alignment padding:')
      print(hexdump.Hexdump(padding_data))

    file_offset += padding_size

    file_entry = CPIOArchiveFileEntry(self._file_object)

    file_entry.data_offset = file_offset
    file_entry.data_size = file_size
    file_entry.group_identifier = group_identifier
    file_entry.inode_number = inode_number
    file_entry.modification_time = modification_time
    file_entry.path = path_string
    file_entry.mode = mode
    file_entry.size = (
        file_entry_struct_size + path_string_size + padding_size + file_size)
    file_entry.user_identifier = user_identifier

    if self.file_format in (u'crc', u'newc'):
      file_offset += file_size

      padding_size = file_offset % 4
      if padding_size > 0:
        padding_size = 4 - padding_size

      if self._debug:
        self._file_object.seek(file_offset, os.SEEK_SET)
        padding_data = self._file_object.read(padding_size)

        print(u'File data alignment padding:')
        print(hexdump.Hexdump(padding_data))

      file_entry.size += padding_size

    if self._debug:
      print(u'')

    return file_entry

  def _ReadFileEntries(self):
    """Reads the file entries from the cpio archive."""
    file_offset = 0
    while file_offset < self._file_size or self._file_size == 0:
      file_entry = self._ReadFileEntry(file_offset)
      file_offset += file_entry.size
      if file_entry.path == u'TRAILER!!!':
        break

      if file_entry.path in self._file_entries:
        continue

      self._file_entries[file_entry.path] = file_entry

    self.size = file_offset

  def Close(self):
    """Closes the CPIO archive file."""
    if not self._file_object:
      return

    if self._file_object_opened_in_object:
      self._file_object.close()
      self._file_object_opened_in_object = False
    self._file_entries = None
    self._file_object = None

  def FileEntryExistsByPath(self, path):
    """Determines if file entry for a specific path exists.

    Args:
      path (str): path of the file entry.

    Returns:
      bool: True if the file entry exists.
    """
    if self._file_entries is None:
      return False

    return path in self._file_entries

  def GetFileEntries(self, path_prefix=u''):
    """Retrieves the file entries.

    Args:
      path_prefix (Optional[str]): path prefix.

    Yields:
      CPIOArchiveFileEntry: CPIO archive file entry.
    """
    for path, file_entry in iter(self._file_entries.items()):
      if path.startswith(path_prefix):
        yield file_entry

  def GetFileEntryByPath(self, path):
    """Retrieves a file entry for a specific path.

    Args:
      path (str): path of the file entry.

    Returns:
      CPIOArchiveFileEntry: CPIO archive file entry or None.
    """
    if self._file_entries is None:
      return

    return self._file_entries.get(path, None)

  def Open(self, filename):
    """Opens the CPIO archive file.

    Args:
      filename (str): path of the file..

    Raises:
      IOError: if the file format signature is not supported.
    """
    stat_object = os.stat(filename)

    file_object = open(filename, 'rb')

    self.OpenFileObject(file_object)

    self._file_size = stat_object.st_size
    self._file_object_opened_in_object = True

  def OpenFileObject(self, file_object):
    """Opens the CPIO archive file.

    Args:
      file_object (file): file-like object.

    Raises:
      IOError: if the file is alread opened or the format signature is
               not supported.
    """
    if self._file_object:
      raise IOError(u'Already open')

    file_object.seek(0, os.SEEK_SET)
    signature_data = file_object.read(6)

    self.file_format = None
    if len(signature_data) > 2:
      if signature_data[:2] == self._CPIO_SIGNATURE_BINARY_BIG_ENDIAN:
        self.file_format = u'bin-big-endian'
      elif signature_data[:2] == self._CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN:
        self.file_format = u'bin-little-endian'
      elif signature_data == self._CPIO_SIGNATURE_PORTABLE_ASCII:
        self.file_format = u'odc'
      elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII:
        self.file_format = u'newc'
      elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM:
        self.file_format = u'crc'

    if self.file_format is None:
      raise IOError(u'Unsupported CPIO format.')

    self._file_entries = {}
    self._file_object = file_object

    self._ReadFileEntries()

Example #12

Show file

File: mac_keychain.py Project: cvandeplas/plaso

class KeychainParser(interface.BaseParser):
  """Parser for Keychain files."""

  NAME = 'mac_keychain'
  DESCRIPTION = u'Parser for Mac OS X Keychain files.'

  KEYCHAIN_MAGIC_HEADER = 'kych'
  KEYCHAIN_MAJOR_VERSION = 1
  KEYCHAIN_MINOR_VERSION = 0

  RECORD_TYPE_APPLICATION = 0x80000000
  RECORD_TYPE_INTERNET = 0x80000001

  # DB HEADER.
  KEYCHAIN_DB_HEADER = construct.Struct(
      'db_header',
      construct.String('magic', 4),
      construct.UBInt16('major_version'),
      construct.UBInt16('minor_version'),
      construct.UBInt32('header_size'),
      construct.UBInt32('schema_offset'),
      construct.Padding(4))

  # DB SCHEMA.
  KEYCHAIN_DB_SCHEMA = construct.Struct(
      'db_schema',
      construct.UBInt32('size'),
      construct.UBInt32('number_of_tables'))
  # For each number_of_tables, the schema has a TABLE_OFFSET with the
  # offset starting in the DB_SCHEMA.
  TABLE_OFFSET = construct.UBInt32('table_offset')

  TABLE_HEADER = construct.Struct(
      'table_header',
      construct.UBInt32('table_size'),
      construct.UBInt32('record_type'),
      construct.UBInt32('number_of_records'),
      construct.UBInt32('first_record'),
      construct.UBInt32('index_offset'),
      construct.Padding(4),
      construct.UBInt32('recordnumbercount'))

  RECORD_HEADER = construct.Struct(
      'record_header',
      construct.UBInt32('entry_length'),
      construct.Padding(12),
      construct.UBInt32('ssgp_length'),
      construct.Padding(4),
      construct.UBInt32('creation_time'),
      construct.UBInt32('last_mod_time'),
      construct.UBInt32('text_description'),
      construct.Padding(4),
      construct.UBInt32('comments'),
      construct.Padding(8),
      construct.UBInt32('entry_name'),
      construct.Padding(20),
      construct.UBInt32('account_name'),
      construct.Padding(4))
  RECORD_HEADER_APP = construct.Struct(
      'record_entry_app',
      RECORD_HEADER,
      construct.Padding(4))
  RECORD_HEADER_INET = construct.Struct(
      'record_entry_inet',
      RECORD_HEADER,
      construct.UBInt32('where'),
      construct.UBInt32('protocol'),
      construct.UBInt32('type'),
      construct.Padding(4),
      construct.UBInt32('url'))

  TEXT = construct.PascalString(
      'text', length_field = construct.UBInt32('length'))
  TIME = construct.Struct(
      'timestamp',
      construct.String('year', 4),
      construct.String('month', 2),
      construct.String('day', 2),
      construct.String('hour', 2),
      construct.String('minute', 2),
      construct.String('second', 2),
     construct.Padding(2))
  TYPE_TEXT = construct.String('type', 4)

  # TODO: add more protocols.
  _PROTOCOL_TRANSLATION_DICT = {
      u'htps': u'https',
      u'smtp': u'smtp',
      u'imap': u'imap',
      u'http': u'http'}

  def _GetTimestampFromEntry(self, parser_context, file_entry, structure):
    """Parse a time entry structure into a microseconds since Epoch in UTC.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      structure: TIME entry structure:
                 year: String with the number of the year.
                 month: String with the number of the month.
                 day: String with the number of the day.
                 hour: String with the number of the month.
                 minute: String with the number of the minute.
                 second: String with the number of the second.

    Returns:
      Microseconds since Epoch in UTC.
    """
    try:
      return timelib.Timestamp.FromTimeParts(
          int(structure.year, 10), int(structure.month, 10),
          int(structure.day, 10), int(structure.hour, 10),
          int(structure.minute, 10), int(structure.second, 10))
    except ValueError:
      logging.warning(
          u'[{0:s}] Invalid keychain time {1!s} in file: {2:s}'.format(
              self.NAME, parser_context.GetDisplayName(file_entry), structure))
      return 0

  def _ReadEntryApplication(self, parser_context, file_object, file_entry=None):
    """Extracts the information from an application password entry.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_object: A file-like object that points to an Keychain file.
      file_entry: Optional file entry object (instance of dfvfs.FileEntry).
                  The default is None.
    """
    offset = file_object.tell()
    try:
      record = self.RECORD_HEADER_APP.parse_stream(file_object)
    except (IOError, construct.FieldError):
      logging.warning((
          u'[{0:s}] Unsupported record header at 0x{1:08x} in file: '
          u'{2:s}').format(
              self.NAME, offset, parser_context.GetDisplayName(file_entry)))
      return

    (ssgp_hash, creation_time, last_mod_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_context, file_entry, file_object, record.record_header, offset)

    # Move to the end of the record, and then, prepared for the next record.
    file_object.seek(
        record.record_header.entry_length + offset - file_object.tell(),
        os.SEEK_CUR)
    event_object = KeychainApplicationRecordEvent(
        creation_time, eventdata.EventTimestamp.CREATION_TIME,
        entry_name, account_name, text_description, comments, ssgp_hash)
    parser_context.ProduceEvent(
        event_object, parser_name=self.NAME, file_entry=file_entry)

    if creation_time != last_mod_time:
      event_object = KeychainApplicationRecordEvent(
          last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME,
          entry_name, account_name, text_description, comments, ssgp_hash)
      parser_context.ProduceEvent(
          event_object, parser_name=self.NAME, file_entry=file_entry)

  def _ReadEntryHeader(
      self, parser_context, file_entry, file_object, record, offset):
    """Read the common record attributes.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      file_object: A file-like object that points to an Keychain file.
      record: Structure with the header of the record.
      offset: First byte of the record.

    Returns:
      A list of:
        ssgp_hash: Hash of the encrypted data (passwd, cert, note).
        creation_time: When the entry was created.
        last_mod_time: Last time the entry was updated.
        text_description: A brief description of the entry.
        entry_name: Name of the entry
        account_name: Name of the account.
    """
    # Info: The hash header always start with the string ssgp follow by
    #       the hash. Furthermore The fields are always a multiple of four.
    #       Then if it is not multiple the value is padded by 0x00.
    ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:])

    file_object.seek(
        record.creation_time - file_object.tell() + offset - 1, os.SEEK_CUR)
    creation_time = self._GetTimestampFromEntry(
        parser_context, file_entry, self.TIME.parse_stream(file_object))

    file_object.seek(
        record.last_mod_time - file_object.tell() + offset - 1, os.SEEK_CUR)
    last_mod_time = self._GetTimestampFromEntry(
        parser_context, file_entry, self.TIME.parse_stream(file_object))

    # The comment field does not always contain data.
    if record.text_description:
      file_object.seek(
          record.text_description - file_object.tell() + offset -1,
          os.SEEK_CUR)
      text_description = self.TEXT.parse_stream(file_object)
    else:
      text_description = u'N/A'

    # The comment field does not always contain data.
    if record.comments:
      file_object.seek(
          record.text_description - file_object.tell() + offset -1,
          os.SEEK_CUR)
      comments = self.TEXT.parse_stream(file_object)
    else:
      comments = u'N/A'

    file_object.seek(
        record.entry_name - file_object.tell() + offset - 1, os.SEEK_CUR)
    entry_name = self.TEXT.parse_stream(file_object)

    file_object.seek(
        record.account_name - file_object.tell() + offset - 1, os.SEEK_CUR)
    account_name = self.TEXT.parse_stream(file_object)

    return (
        ssgp_hash, creation_time, last_mod_time,
        text_description, comments, entry_name, account_name)

  def _ReadEntryInternet(self, parser_context, file_object, file_entry=None):
    """Extracts the information from an Internet password entry.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_object: A file-like object that points to an Keychain file.
      file_entry: Optional file entry object (instance of dfvfs.FileEntry).
                  The default is None.
    """
    offset = file_object.tell()
    try:
      record = self.RECORD_HEADER_INET.parse_stream(file_object)
    except (IOError, construct.FieldError):
      logging.warning((
          u'[{0:s}] Unsupported record header at 0x{1:08x} in file: '
          u'{2:s}').format(
              self.NAME, offset, parser_context.GetDisplayName(file_entry)))
      return

    (ssgp_hash, creation_time, last_mod_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_context, file_entry, file_object, record.record_header, offset)
    if not record.where:
      where = u'N/A'
      protocol = u'N/A'
      type_protocol = u'N/A'
    else:
      file_object.seek(
          record.where - file_object.tell() + offset - 1, os.SEEK_CUR)
      where = self.TEXT.parse_stream(file_object)
      file_object.seek(
          record.protocol - file_object.tell() + offset - 1, os.SEEK_CUR)
      protocol = self.TYPE_TEXT.parse_stream(file_object)
      file_object.seek(
          record.type - file_object.tell() + offset - 1, os.SEEK_CUR)
      type_protocol = self.TEXT.parse_stream(file_object)
      type_protocol = self._PROTOCOL_TRANSLATION_DICT.get(
          type_protocol, type_protocol)
      if record.url:
        file_object.seek(
            record.url - file_object.tell() + offset - 1, os.SEEK_CUR)
        url = self.TEXT.parse_stream(file_object)
        where = u'{0:s}{1:s}'.format(where, url)

    # Move to the end of the record, and then, prepared for the next record.
    file_object.seek(
        record.record_header.entry_length + offset - file_object.tell(),
        os.SEEK_CUR)

    event_object = KeychainInternetRecordEvent(
        creation_time, eventdata.EventTimestamp.CREATION_TIME,
        entry_name, account_name, text_description,
        comments, where, protocol, type_protocol, ssgp_hash)
    parser_context.ProduceEvent(
        event_object, parser_name=self.NAME, file_entry=file_entry)

    if creation_time != last_mod_time:
      event_object = KeychainInternetRecordEvent(
          last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME,
          entry_name, account_name, text_description,
          comments, where, protocol, type_protocol)
      parser_context.ProduceEvent(
          event_object, parser_name=self.NAME, file_entry=file_entry)

  def _VerifyStructure(self, file_object):
    """Verify that we are dealing with an Keychain entry.

    Args:
      file_object: A file-like object that points to an Keychain file.

    Returns:
      A list of table positions if it is a keychain, None otherwise.
    """
    # INFO: The HEADER KEYCHAIN:
    # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z]
    # Where the table offset is relative to the first byte of the DB Schema,
    # then we must add to this offset the size of the [DBHEADER].
    try:
      db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return
    if (db_header.minor_version != self.KEYCHAIN_MINOR_VERSION or
        db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or
        db_header.magic != self.KEYCHAIN_MAGIC_HEADER):
      return

    # Read the database schema and extract the offset for all the tables.
    # They are ordered by file position from the top to the bottom of the file.
    try:
      db_schema = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return
    table_offsets = []
    for _ in range(db_schema.number_of_tables):
      try:
        table_offset = self.TABLE_OFFSET.parse_stream(file_object)
      except (IOError, construct.FieldError):
        return
      table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof())
    return table_offsets

  def Parse(self, parser_context, file_entry):
    """Extract data from a Keychain file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
    file_object = file_entry.GetFileObject()
    table_offsets = self._VerifyStructure(file_object)
    if not table_offsets:
      file_object.close()
      raise errors.UnableToParseFile(u'The file is not a Keychain file.')

    for table_offset in table_offsets:
      # Skipping X bytes, unknown data at this point.
      file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR)
      try:
        table = self.TABLE_HEADER.parse_stream(file_object)
      except construct.FieldError as exception:
        logging.warning((
            u'[{0:s}] Unable to parse table header in file: {1:s} '
            u'with error: {2:s}.').format(
                self.NAME, parser_context.GetDisplayName(file_entry),
                exception))
        continue

      # Table_offset: absolute byte in the file where the table starts.
      # table.first_record: first record in the table, relative to the
      #                     first byte of the table.
      file_object.seek(
          table_offset + table.first_record - file_object.tell(), os.SEEK_CUR)

      if table.record_type == self.RECORD_TYPE_INTERNET:
        for _ in range(table.number_of_records):
          self._ReadEntryInternet(
              parser_context, file_object, file_entry=file_entry)

      elif table.record_type == self.RECORD_TYPE_APPLICATION:
        for _ in range(table.number_of_records):
          self._ReadEntryApplication(
              parser_context, file_object, file_entry=file_entry)

    file_object.close()

Example #13

Show file

class CPIOArchiveFile(object):
    """CPIO archive file.

  Attributes:
    file_format (str): CPIO file format.
  """
    # pylint: disable=no-member

    _CPIO_SIGNATURE_BINARY_BIG_ENDIAN = b'\x71\xc7'
    _CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN = b'\xc7\x71'
    _CPIO_SIGNATURE_PORTABLE_ASCII = b'070707'
    _CPIO_SIGNATURE_NEW_ASCII = b'070701'
    _CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM = b'070702'

    _CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct(
        'cpio_binary_big_endian_file_entry', construct.UBInt16('signature'),
        construct.UBInt16('device_number'), construct.UBInt16('inode_number'),
        construct.UBInt16('mode'), construct.UBInt16('user_identifier'),
        construct.UBInt16('group_identifier'),
        construct.UBInt16('number_of_links'),
        construct.UBInt16('special_device_number'),
        construct.UBInt16('modification_time_upper'),
        construct.UBInt16('modification_time_lower'),
        construct.UBInt16('path_string_size'),
        construct.UBInt16('file_size_upper'),
        construct.UBInt16('file_size_lower'))

    _CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT = construct.Struct(
        'cpio_binary_little_endian_file_entry', construct.ULInt16('signature'),
        construct.ULInt16('device_number'), construct.ULInt16('inode_number'),
        construct.ULInt16('mode'), construct.ULInt16('user_identifier'),
        construct.ULInt16('group_identifier'),
        construct.ULInt16('number_of_links'),
        construct.ULInt16('special_device_number'),
        construct.ULInt16('modification_time_upper'),
        construct.ULInt16('modification_time_lower'),
        construct.ULInt16('path_string_size'),
        construct.ULInt16('file_size_upper'),
        construct.ULInt16('file_size_lower'))

    _CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT = construct.Struct(
        'cpio_portable_ascii_file_entry', construct.Bytes('signature', 6),
        construct.Bytes('device_number',
                        6), construct.Bytes('inode_number', 6),
        construct.Bytes('mode', 6), construct.Bytes('user_identifier', 6),
        construct.Bytes('group_identifier', 6),
        construct.Bytes('number_of_links', 6),
        construct.Bytes('special_device_number', 6),
        construct.Bytes('modification_time', 11),
        construct.Bytes('path_string_size', 6),
        construct.Bytes('file_size', 11))

    _CPIO_NEW_ASCII_FILE_ENTRY_STRUCT = construct.Struct(
        'cpio_portable_ascii_file_entry', construct.Bytes('signature', 6),
        construct.Bytes('inode_number', 8), construct.Bytes('mode', 8),
        construct.Bytes('user_identifier', 8),
        construct.Bytes('group_identifier', 8),
        construct.Bytes('number_of_links', 8),
        construct.Bytes('modification_time', 8),
        construct.Bytes('file_size', 8),
        construct.Bytes('device_major_number', 8),
        construct.Bytes('device_minor_number', 8),
        construct.Bytes('special_device_major_number', 8),
        construct.Bytes('special_device_minor_number', 8),
        construct.Bytes('path_string_size', 8), construct.Bytes('checksum', 8))

    def __init__(self):
        """Initializes the CPIO archive file object."""
        super(CPIOArchiveFile, self).__init__()
        self._file_entries = None
        self._file_object = None
        self._file_object_opened_in_object = False
        self._file_size = 0

        self.file_format = None

    def _ReadFileEntry(self, file_object, file_offset):
        """Reads a file entry.

    Args:
      file_object (FileIO): file-like object.
      file_offset (int): current file offset.

    Raises:
      IOError: if the file entry cannot be read.
    """
        file_object.seek(file_offset, os.SEEK_SET)

        if self.file_format == 'bin-big-endian':
            file_entry_struct = self._CPIO_BINARY_BIG_ENDIAN_FILE_ENTRY_STRUCT
        elif self.file_format == 'bin-little-endian':
            file_entry_struct = self._CPIO_BINARY_LITTLE_ENDIAN_FILE_ENTRY_STRUCT
        elif self.file_format == 'odc':
            file_entry_struct = self._CPIO_PORTABLE_ASCII_FILE_ENTRY_STRUCT
        elif self.file_format in ('crc', 'newc'):
            file_entry_struct = self._CPIO_NEW_ASCII_FILE_ENTRY_STRUCT

        file_entry_struct_size = file_entry_struct.sizeof()

        try:
            file_entry_struct = file_entry_struct.parse_stream(file_object)
        except construct.FieldError as exception:
            raise IOError(
                ('Unable to parse file entry data section with error: '
                 '{0:s}').format(exception))

        file_offset += file_entry_struct_size

        if self.file_format in ('bin-big-endian', 'bin-little-endian'):
            inode_number = file_entry_struct.inode_number
            mode = file_entry_struct.mode
            user_identifier = file_entry_struct.user_identifier
            group_identifier = file_entry_struct.group_identifier

            modification_time = (
                (file_entry_struct.modification_time_upper << 16)
                | file_entry_struct.modification_time_lower)

            path_string_size = file_entry_struct.path_string_size

            file_size = ((file_entry_struct.file_size_upper << 16)
                         | file_entry_struct.file_size_lower)

        elif self.file_format == 'odc':
            inode_number = int(file_entry_struct.inode_number, 8)
            mode = int(file_entry_struct.mode, 8)
            user_identifier = int(file_entry_struct.user_identifier, 8)
            group_identifier = int(file_entry_struct.group_identifier, 8)
            modification_time = int(file_entry_struct.modification_time, 8)
            path_string_size = int(file_entry_struct.path_string_size, 8)
            file_size = int(file_entry_struct.file_size, 8)

        elif self.file_format in ('crc', 'newc'):
            inode_number = int(file_entry_struct.inode_number, 16)
            mode = int(file_entry_struct.mode, 16)
            user_identifier = int(file_entry_struct.user_identifier, 16)
            group_identifier = int(file_entry_struct.group_identifier, 16)
            modification_time = int(file_entry_struct.modification_time, 16)
            path_string_size = int(file_entry_struct.path_string_size, 16)
            file_size = int(file_entry_struct.file_size, 16)

        path_string_data = file_object.read(path_string_size)
        file_offset += path_string_size

        # TODO: should this be ASCII?
        path_string = path_string_data.decode('ascii')
        path_string, _, _ = path_string.partition('\x00')

        if self.file_format in ('bin-big-endian', 'bin-little-endian'):
            padding_size = file_offset % 2
            if padding_size > 0:
                padding_size = 2 - padding_size

        elif self.file_format == 'odc':
            padding_size = 0

        elif self.file_format in ('crc', 'newc'):
            padding_size = file_offset % 4
            if padding_size > 0:
                padding_size = 4 - padding_size

        file_offset += padding_size

        file_entry = CPIOArchiveFileEntry()
        file_entry.data_offset = file_offset
        file_entry.data_size = file_size
        file_entry.group_identifier = group_identifier
        file_entry.inode_number = inode_number
        file_entry.modification_time = modification_time
        file_entry.path = path_string
        file_entry.mode = mode
        file_entry.size = (file_entry_struct_size + path_string_size +
                           padding_size + file_size)
        file_entry.user_identifier = user_identifier

        file_offset += file_size

        if self.file_format in ('bin-big-endian', 'bin-little-endian'):
            padding_size = file_offset % 2
            if padding_size > 0:
                padding_size = 2 - padding_size

        elif self.file_format == 'odc':
            padding_size = 0

        elif self.file_format in ('crc', 'newc'):
            padding_size = file_offset % 4
            if padding_size > 0:
                padding_size = 4 - padding_size

        if padding_size > 0:
            file_entry.size += padding_size

        return file_entry

    def _ReadFileEntries(self, file_object):
        """Reads the file entries from the cpio archive.

    Args:
      file_object (FileIO): file-like object.
    """
        self._file_entries = {}

        file_offset = 0
        while file_offset < self._file_size:
            file_entry = self._ReadFileEntry(file_object, file_offset)
            file_offset += file_entry.size
            if file_entry.path == 'TRAILER!!!':
                break

            if file_entry.path in self._file_entries:
                # TODO: alert on file entries with duplicate paths?
                continue

            self._file_entries[file_entry.path] = file_entry

    def Close(self):
        """Closes the CPIO archive file."""
        self._file_entries = None
        self._file_object = None
        self._file_size = None

    def FileEntryExistsByPath(self, path):
        """Determines if file entry for a specific path exists.

    Returns:
      bool: True if the file entry exists.
    """
        if self._file_entries is None:
            return False

        return path in self._file_entries

    def GetFileEntries(self, path_prefix=''):
        """Retrieves the file entries.

    Args:
      path_prefix (str): path prefix.

    Yields:
      CPIOArchiveFileEntry: a CPIO archive file entry.
    """
        if self._file_entries:
            for path, file_entry in iter(self._file_entries.items()):
                if path.startswith(path_prefix):
                    yield file_entry

    def GetFileEntryByPath(self, path):
        """Retrieves a file entry for a specific path.

    Returns:
      CPIOArchiveFileEntry: a CPIO archive file entry or None if not available.
    """
        if self._file_entries:
            return self._file_entries.get(path, None)

    def Open(self, file_object):
        """Opens the CPIO archive file.

    Args:
      file_object (FileIO): a file-like object.

    Raises:
      IOError: if the file format signature is not supported.
    """
        file_object.seek(0, os.SEEK_SET)
        signature_data = file_object.read(6)

        self.file_format = None
        if len(signature_data) > 2:
            if signature_data[:2] == self._CPIO_SIGNATURE_BINARY_BIG_ENDIAN:
                self.file_format = 'bin-big-endian'
            elif signature_data[:
                                2] == self._CPIO_SIGNATURE_BINARY_LITTLE_ENDIAN:
                self.file_format = 'bin-little-endian'
            elif signature_data == self._CPIO_SIGNATURE_PORTABLE_ASCII:
                self.file_format = 'odc'
            elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII:
                self.file_format = 'newc'
            elif signature_data == self._CPIO_SIGNATURE_NEW_ASCII_WITH_CHECKSUM:
                self.file_format = 'crc'

        if self.file_format is None:
            raise IOError('Unsupported CPIO format.')

        self._file_object = file_object
        self._file_size = file_object.get_size()

        self._ReadFileEntries(self._file_object)

    def ReadDataAtOffset(self, file_offset, size):
        """Reads a byte string from the file-like object at a specific offset.

    Args:
      file_offset (int): file offset.
      size (int): number of bytes to read.

    Returns:
      bytes: data read.

    Raises:
      IOError: if the read failed.
    """
        self._file_object.seek(file_offset, os.SEEK_SET)
        return self._file_object.read(size)

Example #14

Show file

File: asl.py Project: cvandeplas/plaso

class AslParser(interface.BaseParser):
    """Parser for ASL log files."""

    NAME = 'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00'

    # If not right assigned, the value is "-1".
    ASL_NO_RIGHTS = 'ffffffff'

    # Priority level (criticity)
    ASL_MESSAGE_PRIORITY = {
        0: 'EMERGENCY',
        1: 'ALERT',
        2: 'CRITICAL',
        3: 'ERROR',
        4: 'WARNING',
        5: 'NOTICE',
        6: 'INFO',
        7: 'DEBUG'
    }

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: epoch time when the first entry was written.
    # last_offset: last record in the file.
    ASL_HEADER_STRUCT = construct.Struct('asl_header_struct',
                                         construct.String('magic', 12),
                                         construct.UBInt32('version'),
                                         construct.UBInt64('offset'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('cache_size'),
                                         construct.UBInt64('last_offset'),
                                         construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: Epoch integer that has the time when the entry was created.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    ASL_RECORD_STRUCT = construct.Struct('asl_record_struct',
                                         construct.Padding(2),
                                         construct.UBInt32('tam_entry'),
                                         construct.UBInt64('next_offset'),
                                         construct.UBInt64('asl_message_id'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('nanosec'),
                                         construct.UBInt16('level'),
                                         construct.UBInt16('flags'),
                                         construct.UBInt32('pid'),
                                         construct.UBInt32('uid'),
                                         construct.UBInt32('gid'),
                                         construct.UBInt32('read_uid'),
                                         construct.UBInt32('read_gid'),
                                         construct.UBInt64('ref_pid'))

    ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    ASL_OCTET_STRING = construct.ExprAdapter(construct.Octet('string'),
                                             encoder=lambda obj, ctx: ord(obj),
                                             decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    ASL_STRING = construct.BitStruct(
        'string', construct.Flag('type'), construct.Bits('filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble('string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    ASL_POINTER = construct.UBInt64('pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes lenght of String][String].
    ASL_RECORD_DYN_VALUE = construct.Struct(
        'asl_record_dyn_value', construct.Padding(2),
        construct.PascalString('value',
                               length_field=construct.UBInt32('length')))

    def Parse(self, parser_context, file_entry):
        """Extract entries from an ASL file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self.ASL_MAGIC:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        # Get the first and the last entry.
        offset = header.offset
        old_offset = header.offset
        last_offset_header = header.last_offset

        # If the ASL file has entries.
        if offset:
            event_object, offset = self.ReadAslEvent(file_object, offset)
            while event_object:
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

                # TODO: an anomaly object must be emitted once that is implemented.
                # Sanity check, the last read element must be the same as
                # indicated by the header.
                if offset == 0 and old_offset != last_offset_header:
                    logging.warning(u'Parsing ended before the header ends.')
                old_offset = offset
                event_object, offset = self.ReadAslEvent(file_object, offset)

        file_object.close()

    def ReadAslEvent(self, file_object, offset):
        """Returns an AslEvent from a single ASL entry.

    Args:
      file_object: a file-like object that points to an ASL file.
      offset: offset where the static part of the entry starts.

    Returns:
      An event object constructed from a single ASL record.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_start = file_object.tell()
        dynamic_part = file_object.read(offset - file_object.tell())

        if not offset:
            return None, None

        try:
            record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            logging.warning(
                u'Unable to parse ASL event with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2x[8bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the exta field.
        # All of this 8-byte field can be saved using one of these three differents
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                raw_field = file_object.read(8)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:d}'.format(
                        exception))
                return None, None
            try:
                # Try to read as a String.
                field = self.ASL_STRING.parse(raw_field)
                values.append(''.join(field.string[0:field.string_length]))
                # Go to parse the next extra field.
                tam_fields -= 8
                continue
            except ValueError:
                pass
            # If it is not a string, it must be a pointer.
            try:
                field = self.ASL_POINTER.parse(raw_field)
            except ValueError as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:s}'.format(
                        exception))
                return None, None
            if field != 0:
                # The next IF ELSE is only for performance issues, avoiding seek.
                # If the pointer points a lower position than where the actual entry
                # starts, it means that it points to a previuos entry.
                pos = field - dynamic_start
                # Bigger or equal 0 means that the data is in the actual entry.
                if pos >= 0:
                    try:
                        values.append((self.ASL_RECORD_DYN_VALUE.parse(
                            dynamic_part[pos:])).value.partition('\x00')[0])
                    except (IOError, construct.FieldError) as exception:
                        logging.warning(
                            u'Unable to parse ASL event with error: {0:s}'.
                            format(exception))
                        return None, None
                else:
                    # Only if it is a pointer that points to the
                    # heap from another entry we use the seek method.
                    main_position = file_object.tell()
                    # If the pointer is in a previous entry.
                    if main_position > field:
                        file_object.seek(field - main_position, os.SEEK_CUR)
                        try:
                            values.append(
                                (self.ASL_RECORD_DYN_VALUE.parse_stream(
                                    file_object)).value.partition('\x00')[0])
                        except (IOError, construct.FieldError):
                            logging.warning((
                                u'The pointer at {0:d} (0x{0:x}) points to invalid '
                                u'information.'
                            ).format(main_position -
                                     self.ASL_POINTER.sizeof()))
                        # Come back to the position in the entry.
                        _ = file_object.read(main_position -
                                             file_object.tell())
                    else:
                        _ = file_object.read(field - main_position)
                        values.append((self.ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)).value.partition('\x00')[0])
                        # Come back to the position in the entry.
                        file_object.seek(main_position - file_object.tell(),
                                         os.SEEK_CUR)
            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        microsecond = record_header.nanosec // 1000
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            record_header.timestamp, microsecond)
        record_position = offset
        message_id = record_header.asl_message_id
        level = u'{0} ({1})'.format(
            self.ASL_MESSAGE_PRIORITY[record_header.level],
            record_header.level)
        # If the value is -1 (FFFFFFFF), it can be read by everyone.
        if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16):
            read_uid = record_header.read_uid
        else:
            read_uid = 'ALL'
        if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16):
            read_gid = record_header.read_gid
        else:
            read_gid = 'ALL'

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        computer_name = values[0]
        sender = values[1]
        facility = values[2]
        message = values[3]

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = ''
        if len(values) > 4:
            values = values[4:]
            for index in xrange(0, len(values) // 2):
                extra_information += (u'[{0}: {1}]'.format(
                    values[index * 2], values[(index * 2) + 1]))

        # Return the event and the offset for the next entry.
        return AslEvent(timestamp, record_position, message_id, level,
                        record_header, read_uid, read_gid, computer_name,
                        sender, facility, message,
                        extra_information), record_header.next_offset

Example #15

Show file

# MSc Project in Royal Holloway, University of London.
__author__ = 'Joaquin Moreno Garijo ([email protected])'

import construct
import datetime
import os
import sys

KEYCHAIN_MAGIC_HEADER = 'kych'
KEYCHAIN_MAJOR_VERSION = 1
KEYCHAIN_MINOR_VERSION = 0

# DB HEADER
KEYCHAIN_DB_HEADER = construct.Struct('db_header',
                                      construct.String('magic', 4),
                                      construct.UBInt16('major_version'),
                                      construct.UBInt16('minor_version'),
                                      construct.UBInt32('header_size'),
                                      construct.UBInt32('schema_offset'),
                                      construct.Padding(4))

# DB SCHEMA
KEYCHAIN_DB_SCHEMA = construct.Struct('db_schema', construct.UBInt32('size'),
                                      construct.UBInt32('number_of_tables'))
# For each umber_of_tables, the schema has a TABLE_OFFSET with the
# offset starting in the DB_SCHEMA.
TABLE_OFFSET = construct.UBInt32('table_offset')

# TABLE
TABLE_RECORD_TYPE = {
    0: u'Schema information',

Example #16

Show file

File: firefox_cache.py Project: robeweber/plaso

class FirefoxCacheParser(BaseFirefoxCacheParser):
    """Parses Firefox cache version 1 files (Firefox 31 or earlier)."""

    NAME = u'firefox_cache'
    DESCRIPTION = (
        u'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).')

    _CACHE_VERSION = 1

    # Initial size of Firefox 4 and later cache files.
    _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024

    # Smallest possible block size in Firefox cache files.
    _MINUMUM_BLOCK_SIZE = 256

    _CACHE_RECORD_HEADER_STRUCT = construct.Struct(
        u'record_header', construct.UBInt16(u'major'),
        construct.UBInt16(u'minor'), construct.UBInt32(u'location'),
        construct.UBInt32(u'fetch_count'), construct.UBInt32(u'last_fetched'),
        construct.UBInt32(u'last_modified'), construct.UBInt32(u'expire_time'),
        construct.UBInt32(u'data_size'), construct.UBInt32(u'request_size'),
        construct.UBInt32(u'info_size'))

    _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof()

    # TODO: change into regexp.
    _CACHE_FILENAME = (pyparsing.Word(pyparsing.hexnums, exact=5) +
                       pyparsing.Word(u'm', exact=1) +
                       pyparsing.Word(pyparsing.nums, exact=2))

    FIREFOX_CACHE_CONFIG = collections.namedtuple(
        u'firefox_cache_config', u'block_size first_record_offset')

    def _GetFirefoxConfig(self, file_object, display_name):
        """Determine cache file block size.

    Args:
      file_object: A file-like object.
      display_name: the display name.

    Raises:
      UnableToParseFile: if no valid cache record could be found.
    """
        # There ought to be a valid record within the first 4 MiB. We use this
        # limit to prevent reading large invalid files.
        to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE)

        while file_object.get_offset() < to_read:
            offset = file_object.get_offset()

            try:
                # We have not yet determined the block size, so we use the smallest
                # possible size.
                fetched, _, _ = self._NextRecord(file_object, display_name,
                                                 self._MINUMUM_BLOCK_SIZE)

                record_size = (self._CACHE_RECORD_HEADER_SIZE +
                               fetched.request_size + fetched.info_size)

                if record_size >= 4096:
                    # _CACHE_003_
                    block_size = 4096
                elif record_size >= 1024:
                    # _CACHE_002_
                    block_size = 1024
                else:
                    # _CACHE_001_
                    block_size = 256

                return self.FIREFOX_CACHE_CONFIG(block_size, offset)

            except IOError:
                logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
                    self.NAME, display_name, offset))

        raise errors.UnableToParseFile(
            u'Could not find a valid cache record. Not a Firefox cache file.')

    def _NextRecord(self, file_object, display_name, block_size):
        """Provide the next cache record.

    Args:
      file_object: A file-like object.
      display_name: the display name.
      block_size: the block size.

    Returns:
      A tuple containing the fetched, modified and expire event objects
      (instances of EventObject) or None.
    """
        offset = file_object.get_offset()

        try:
            cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream(
                file_object)
        except (IOError, construct.FieldError):
            raise IOError(u'Unable to parse stream.')

        if not self._ValidateCacheRecordHeader(cache_record_header):
            # Move reader to next candidate block.
            file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE
            file_object.seek(file_offset, os.SEEK_CUR)
            raise IOError(u'Not a valid Firefox cache record.')

        # The last byte in a request is null.
        url = file_object.read(cache_record_header.request_size)[:-1]

        # HTTP response header, even elements are keys, odd elements values.
        header_data = file_object.read(cache_record_header.info_size)

        request_method, response_code = self._ParseHTTPHeaders(
            header_data, offset, display_name)

        # A request can span multiple blocks, so we use modulo.
        file_offset = file_object.get_offset() - offset
        _, remainder = divmod(file_offset, block_size)

        # Move reader to next candidate block. Include the null-byte skipped above.
        file_object.seek(block_size - remainder, os.SEEK_CUR)

        cache_record_values = {
            u'data_size': cache_record_header.data_size,
            u'fetch_count': cache_record_header.fetch_count,
            u'info_size': cache_record_header.info_size,
            u'location': cache_record_header.location,
            u'major': cache_record_header.major,
            u'minor': cache_record_header.minor,
            u'request_method': request_method,
            u'request_size': cache_record_header.request_size,
            u'response_code': response_code,
            u'version': self._CACHE_VERSION,
            u'url': url
        }

        fetched = FirefoxCacheEvent(cache_record_header.last_fetched,
                                    eventdata.EventTimestamp.LAST_VISITED_TIME,
                                    cache_record_values)

        if cache_record_header.last_modified:
            modified = FirefoxCacheEvent(cache_record_header.last_modified,
                                         eventdata.EventTimestamp.WRITTEN_TIME,
                                         cache_record_values)
        else:
            modified = None

        if cache_record_header.expire_time:
            expire = FirefoxCacheEvent(
                cache_record_header.expire_time,
                eventdata.EventTimestamp.EXPIRATION_TIME, cache_record_values)
        else:
            expire = None

        return fetched, modified, expire

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Firefox cache file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        filename = parser_mediator.GetFilename()
        display_name = parser_mediator.GetDisplayName()

        try:
            # Match cache filename. Five hex characters + 'm' + two digit
            # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
            # instead contain data only.
            self._CACHE_FILENAME.parseString(filename)
        except pyparsing.ParseException:
            if not filename.startswith(u'_CACHE_00'):
                raise errors.UnableToParseFile(u'Not a Firefox cache1 file.')

        firefox_config = self._GetFirefoxConfig(file_object, display_name)

        file_object.seek(firefox_config.first_record_offset)

        while file_object.get_offset() < file_object.get_size():
            try:
                fetched, modified, expire = self._NextRecord(
                    file_object, display_name, firefox_config.block_size)
                parser_mediator.ProduceEvent(fetched)

                if modified:
                    parser_mediator.ProduceEvent(modified)

                if expire:
                    parser_mediator.ProduceEvent(expire)
            except IOError:
                file_offset = file_object.get_offset(
                ) - self._MINUMUM_BLOCK_SIZE
                logging.debug(
                    (u'[{0:s}] Invalid cache record in file: {1:s} at offset: '
                     u'{2:d}.').format(self.NAME, display_name, file_offset))

Example #17

Show file

File: peer.py Project: romanz/pybt

Bytes = lambda name: c.ExprAdapter(c.OptionalGreedyRange(c.StaticField(name, 1)), 
    encoder=lambda obj, ctx : list(obj),
    decoder=lambda obj, ctx : ''.join(obj)
)

_commands = {
    'choke'         : [c.Magic('\x00')],
    'unchoke'       : [c.Magic('\x01')],
    'interested'    : [c.Magic('\x02')],
    'uninterested'  : [c.Magic('\x03')],
    'have'          : [c.Magic('\x04'), c.UBInt32('index')],
    'bitfield'      : [c.Magic('\x05'), Bytes('bits')],
    'request'       : [c.Magic('\x06'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')],
    'piece'         : [c.Magic('\x07'), c.UBInt32('index'), c.UBInt32('begin'), Bytes('data')],
    'cancel'        : [c.Magic('\x08'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')],
    'port'          : [c.Magic('\x09'), c.UBInt16('port')],
    'extended'      : [c.Magic('\x14'), c.UBInt8('cmd'), Bytes('msg')],
}

for k, v in _commands.items():
    _commands[k] = c.Struct(k, *v)

def build_handshake(info_hash, host_id, extensions):

    bits = bitarray.bitarray([0]*64, endian='little')
    for i in extensions:
        bits[i] = True

    obj = c.Container(info_hash=info_hash, peer_id=host_id, 
                      reserved=bits.tobytes())

Example #18

Show file

class CupsIppParser(interface.BaseParser):
    """Parser for CUPS IPP files. """

    NAME = 'cups_ipp'
    DESCRIPTION = u'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct',
                                       construct.UBInt8('major_version'),
                                       construct.UBInt8('minor_version'),
                                       construct.UBInt16('operation_id'),
                                       construct.UBInt32('request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID.
    TYPE_GENERAL_INTEGER = 32
    TYPE_INTEGER = 33
    TYPE_ENUMERATION = 35
    TYPE_BOOL = 34

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8('integer')
    INTEGER_32 = construct.UBInt32('integer')
    TEXT = construct.PascalString('text',
                                  length_field=construct.UBInt8('length'))
    BOOLEAN = construct.Struct('boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct('integer_value', construct.Padding(1),
                               INTEGER_32)

    # Name of the pair.
    PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    NAME_PAIR_TRANSLATION = {
        'printer-uri': u'uri',
        'job-uuid': u'job_id',
        'DestinationPrinterID': u'printer_id',
        'job-originating-user-name': u'user',
        'job-name': u'job_name',
        'document-format': u'doc_type',
        'job-originating-host-name': u'computer_name',
        'com.apple.print.JobInfo.PMApplicationName': u'application',
        'com.apple.print.JobInfo.PMJobOwner': u'owner'
    }

    def Parse(self, parser_context, file_entry):
        """Extract a entry from an CUPS IPP file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse CUPS IPP Header with error: {0:s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            file_object.close()
            raise errors.UnableToParseFile(
                u'[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logging.debug(
                u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_context.GetDisplayName(file_entry)))

        # Read the pairs extracting the name and the value.
        data_dict = {}
        name, value = self.ReadPair(parser_context, file_entry, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self.ReadPair(parser_context, file_entry,
                                        file_object)

        # Yield the events.
        if u'time-at-creation' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-creation'][0],
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        if u'time-at-processing' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-processing'][0],
                                        eventdata.EventTimestamp.START_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        if u'time-at-completed' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-completed'][0],
                                        eventdata.EventTimestamp.END_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        file_object.close()

    def ReadPair(self, parser_context, file_entry, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      file_object: a file-like object that points to a file.

    Returns:
      A list of name and value. If name and value cannot be read both are
      set to None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            _ = self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported identifier in file: {1:s}.'.format(
                    self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        # Name = Length name + name + 0x00
        try:
            name = self.PAIR_NAME.parse_stream(file_object).text
        except (IOError, construct.FieldError):
            logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format(
                self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        try:
            if type_id in [
                    self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                    self.TYPE_ENUMERATION
            ]:
                value = self.INTEGER.parse_stream(file_object).integer

            elif type_id == self.TYPE_BOOL:
                value = bool(self.BOOLEAN.parse_stream(file_object).integer)

            else:
                value = self.TEXT.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported value in file: {1:s}.'.format(
                    self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        return name, value

Example #19

Show file

class JavaIDXParser(interface.FileObjectParser):
    """Parse Java WebStart Cache IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    _INITIAL_FILE_OFFSET = None

    NAME = u'java_idx'
    DESCRIPTION = u'Parser for Java WebStart Cache IDX files.'

    IDX_SHORT_STRUCT = construct.Struct(u'magic', construct.UBInt8(u'busy'),
                                        construct.UBInt8(u'incomplete'),
                                        construct.UBInt32(u'idx_version'))

    IDX_602_STRUCT = construct.Struct(
        u'IDX_602_Full', construct.UBInt16(u'null_space'),
        construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'),
        construct.UBInt64(u'last_modified_date'),
        construct.UBInt64(u'expiration_date'),
        construct.PascalString(u'version_string',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'url',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'namespace',
                               length_field=construct.UBInt16(u'length')),
        construct.UBInt32(u'FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        u'IDX_605_Section1', construct.UBInt8(u'shortcut'),
        construct.UBInt32(u'content_length'),
        construct.UBInt64(u'last_modified_date'),
        construct.UBInt64(u'expiration_date'),
        construct.UBInt64(u'validation_date'), construct.UBInt8(u'signed'),
        construct.UBInt32(u'sec2len'), construct.UBInt32(u'sec3len'),
        construct.UBInt32(u'sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        u'IDX_605_Section2',
        construct.PascalString(u'version',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'url',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'namespec',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'ip_address',
                               length_field=construct.UBInt16(u'length')),
        construct.UBInt32(u'FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        u'Java.ReadUTF',
        construct.PascalString(u'string',
                               length_field=construct.UBInt16(u'length')))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Java WebStart Cache IDX file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = u'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128,
                                 os.SEEK_SET)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = u'Unknown'
                ip_address = u'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == u'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                try:
                    download_date = timelib.Timestamp.FromTimeString(
                        value.string, gmt_as_timezone=False)
                except errors.TimestampError:
                    download_date = None
                    parser_mediator.ProduceExtractionError(
                        u'Unable to parse time value: {0:s}'.format(
                            value.string))

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        event_data = JavaIDXEventData()
        event_data.idx_version = magic.idx_version
        event_data.ip_address = ip_address
        event_data.url = url

        date_time = dfdatetime_java_time.JavaTime(timestamp=last_modified_date)
        # TODO: Move the timestamp description into eventdata.
        event = time_events.DateTimeValuesEvent(date_time, u'File Hosted Date')
        parser_mediator.ProduceEventWithEventData(event, event_data)

        if section_one:
            expiration_date = section_one.get(u'expiration_date', None)
            if expiration_date:
                date_time = dfdatetime_java_time.JavaTime(
                    timestamp=expiration_date)
                event = time_events.DateTimeValuesEvent(
                    date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
                parser_mediator.ProduceEventWithEventData(event, event_data)

        if download_date:
            event = time_events.TimestampEvent(
                download_date, definitions.TIME_DESCRIPTION_FILE_DOWNLOADED)
            parser_mediator.ProduceEventWithEventData(event, event_data)

Example #20

Show file

class FirefoxCacheParser(BaseFirefoxCacheParser):
    """Parses Firefox cache version 1 files (Firefox 31 or earlier)."""

    NAME = 'firefox_cache'
    DESCRIPTION = (
        'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).')

    _CACHE_VERSION = 1

    # Initial size of Firefox 4 and later cache files.
    _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024

    # Smallest possible block size in Firefox cache files.
    _MINUMUM_BLOCK_SIZE = 256

    _CACHE_RECORD_HEADER_STRUCT = construct.Struct(
        'record_header', construct.UBInt16('major'),
        construct.UBInt16('minor'), construct.UBInt32('location'),
        construct.UBInt32('fetch_count'), construct.UBInt32('last_fetched'),
        construct.UBInt32('last_modified'), construct.UBInt32('expire_time'),
        construct.UBInt32('data_size'), construct.UBInt32('request_size'),
        construct.UBInt32('info_size'))

    _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof()

    # TODO: change into regexp.
    _CACHE_FILENAME = (pyparsing.Word(pyparsing.hexnums, exact=5) +
                       pyparsing.Word('m', exact=1) +
                       pyparsing.Word(pyparsing.nums, exact=2))

    FIREFOX_CACHE_CONFIG = collections.namedtuple(
        'firefox_cache_config', 'block_size first_record_offset')

    def _GetFirefoxConfig(self, file_object, display_name):
        """Determine cache file block size.

    Args:
      file_object (dfvfs.FileIO): a file-like object.
      display_name (str): display name.

    Raises:
      UnableToParseFile: if no valid cache record could be found.
    """
        # There ought to be a valid record within the first 4 MiB. We use this
        # limit to prevent reading large invalid files.
        to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE)

        while file_object.get_offset() < to_read:
            offset = file_object.get_offset()

            try:
                # We have not yet determined the block size, so we use the smallest
                # possible size.
                cache_record_header, _ = self._ReadCacheEntry(
                    file_object, display_name, self._MINUMUM_BLOCK_SIZE)

                record_size = (self._CACHE_RECORD_HEADER_SIZE +
                               cache_record_header.request_size +
                               cache_record_header.info_size)

                if record_size >= 4096:
                    # _CACHE_003_
                    block_size = 4096
                elif record_size >= 1024:
                    # _CACHE_002_
                    block_size = 1024
                else:
                    # _CACHE_001_
                    block_size = 256

                return self.FIREFOX_CACHE_CONFIG(block_size, offset)

            except IOError:
                logging.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
                    self.NAME, display_name, offset))

        raise errors.UnableToParseFile(
            'Could not find a valid cache record. Not a Firefox cache file.')

    def _ParseCacheEntry(self, parser_mediator, file_object, display_name,
                         block_size):
        """Parses a cache entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
      display_name (str): display name.
      block_size (int): block size.
    """
        cache_record_header, event_data = self._ReadCacheEntry(
            file_object, display_name, block_size)

        date_time = dfdatetime_posix_time.PosixTime(
            timestamp=cache_record_header.last_fetched)
        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_LAST_VISITED)
        parser_mediator.ProduceEventWithEventData(event, event_data)

        if cache_record_header.last_modified:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=cache_record_header.last_modified)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_WRITTEN)
            parser_mediator.ProduceEventWithEventData(event, event_data)

        if cache_record_header.expire_time:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=cache_record_header.expire_time)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def _ReadCacheEntry(self, file_object, display_name, block_size):
        """Reads a cache entry.

    Args:
      file_object (dfvfs.FileIO): a file-like object.
      display_name (str): display name.
      block_size (int): block size.

    Returns:
      tuple: contains:
        construct.Stuct: cache record header structure.
        FirefoxCacheEventData: event data.
    """
        offset = file_object.get_offset()

        try:
            cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream(
                file_object)
        except (IOError, construct.FieldError):
            raise IOError('Unable to parse stream.')

        if not self._ValidateCacheRecordHeader(cache_record_header):
            # Move reader to next candidate block.
            file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE
            file_object.seek(file_offset, os.SEEK_CUR)
            raise IOError('Not a valid Firefox cache record.')

        # The URL string is NUL-terminated.
        url = file_object.read(cache_record_header.request_size)[:-1]

        # HTTP response header, even elements are keys, odd elements values.
        header_data = file_object.read(cache_record_header.info_size)

        request_method, response_code = self._ParseHTTPHeaders(
            header_data, offset, display_name)

        # A request can span multiple blocks, so we use modulo.
        file_offset = file_object.get_offset() - offset
        _, remainder = divmod(file_offset, block_size)

        # Move reader to next candidate block. Include the null-byte skipped above.
        file_object.seek(block_size - remainder, os.SEEK_CUR)

        event_data = FirefoxCacheEventData()
        event_data.data_size = cache_record_header.data_size
        event_data.fetch_count = cache_record_header.fetch_count
        event_data.info_size = cache_record_header.info_size
        event_data.location = cache_record_header.location
        event_data.major = cache_record_header.major
        event_data.minor = cache_record_header.minor
        event_data.request_method = request_method
        event_data.request_size = cache_record_header.request_size
        event_data.response_code = response_code
        event_data.url = url
        event_data.version = self._CACHE_VERSION

        return cache_record_header, event_data

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Firefox cache file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        filename = parser_mediator.GetFilename()
        display_name = parser_mediator.GetDisplayName()

        try:
            # Match cache filename. Five hex characters + 'm' + two digit
            # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
            # instead contain data only.
            self._CACHE_FILENAME.parseString(filename)
        except pyparsing.ParseException:
            if not filename.startswith('_CACHE_00'):
                raise errors.UnableToParseFile('Not a Firefox cache1 file.')

        firefox_config = self._GetFirefoxConfig(file_object, display_name)

        file_object.seek(firefox_config.first_record_offset)

        while file_object.get_offset() < file_object.get_size():
            try:
                self._ParseCacheEntry(parser_mediator, file_object,
                                      display_name, firefox_config.block_size)

            except IOError:
                file_offset = file_object.get_offset(
                ) - self._MINUMUM_BLOCK_SIZE
                logging.debug(
                    ('[{0:s}] Invalid cache record in file: {1:s} at offset: '
                     '{2:d}.').format(self.NAME, display_name, file_offset))

Example #21

Show file

File: tracker.py Project: romanz/pybt

)

announce_req = c.Struct('request',
    c.UBInt64('connection_id'),
    c.UBInt32('action'),
    c.UBInt32('transaction_id'),
    c.Bytes('info_hash', 20),
    c.Bytes('peer_id', 20),
    c.UBInt64('downloaded'),
    c.UBInt64('left'),
    c.UBInt64('uploaded'),
    c.UBInt32('event'),
    c.UBInt32('ip_addr'),
    c.UBInt32('key'),
    c.SBInt32('num_want'),
    c.UBInt16('port'),
)

announce_resp = c.Struct('response', 
    c.UBInt32('action'), 
    c.UBInt32('transaction_id'),
    c.UBInt32('interval'),
    c.UBInt32('leechers'),
    c.UBInt32('seeders'),
    c.GreedyRange(
        c.Struct('peer',
            c.Array(4, c.UBInt8('addr')),
            c.UBInt16('port')
        )
    )
)

Example #22

Show file

class BSMParser(interface.FileObjectParser):
  """Parser for BSM files."""

  NAME = 'bsm_log'
  DESCRIPTION = 'Parser for BSM log files.'

  # BSM supported version (0x0b = 11).
  AUDIT_HEADER_VERSION = 11

  # Magic Trail Header.
  BSM_TOKEN_TRAILER_MAGIC = b'b105'

  # IP Version constants.
  AU_IPv4 = 4
  AU_IPv6 = 16

  IPV4_STRUCT = construct.UBInt32('ipv4')

  IPV6_STRUCT = construct.Struct(
      'ipv6',
      construct.UBInt64('high'),
      construct.UBInt64('low'))

  # Tested structures.
  # INFO: I have ommited the ID in the structures declaration.
  #       I used the BSM_TYPE first to read the ID, and then, the structure.
  # Tokens always start with an ID value that identifies their token
  # type and subsequent structure.
  _BSM_TOKEN = construct.UBInt8('token_id')

  # Data type structures.
  BSM_TOKEN_DATA_CHAR = construct.String('value', 1)
  BSM_TOKEN_DATA_SHORT = construct.UBInt16('value')
  BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value')

  # Common structure used by other structures.
  # audit_uid: integer, uid that generates the entry.
  # effective_uid: integer, the permission user used.
  # effective_gid: integer, the permission group used.
  # real_uid: integer, user id of the user that execute the process.
  # real_gid: integer, group id of the group that execute the process.
  # pid: integer, identification number of the process.
  # session_id: unknown, need research.
  BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
      'subject_data',
      construct.UBInt32('audit_uid'),
      construct.UBInt32('effective_uid'),
      construct.UBInt32('effective_gid'),
      construct.UBInt32('real_uid'),
      construct.UBInt32('real_gid'),
      construct.UBInt32('pid'),
      construct.UBInt32('session_id'))

  # Common structure used by other structures.
  # Identify the kind of inet (IPv4 or IPv6)
  # TODO: instead of 16, AU_IPv6 must be used.
  BSM_IP_TYPE_SHORT = construct.Struct(
      'bsm_ip_type_short',
      construct.UBInt32('net_type'),
      construct.Switch(
          'ip_addr',
          _BSMTokenGetNetType,
          {16: IPV6_STRUCT},
          default=IPV4_STRUCT))

  # Initial fields structure used by header structures.
  # length: integer, the length of the entry, equal to trailer (doc: length).
  # version: integer, version of BSM (AUDIT_HEADER_VERSION).
  # event_type: integer, the type of event (/etc/security/audit_event).
  # modifier: integer, unknown, need research (It is always 0).
  BSM_HEADER = construct.Struct(
      'bsm_header',
      construct.UBInt32('length'),
      construct.UBInt8('version'),
      construct.UBInt16('event_type'),
      construct.UBInt16('modifier'))

  # First token of one entry.
  # timestamp: unsigned integer, number of seconds since
  #            January 1, 1970 00:00:00 UTC.
  # microseconds: unsigned integer, number of micro seconds.
  BSM_HEADER32 = construct.Struct(
      'bsm_header32',
      BSM_HEADER,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  BSM_HEADER64 = construct.Struct(
      'bsm_header64',
      BSM_HEADER,
      construct.UBInt64('timestamp'),
      construct.UBInt64('microseconds'))

  BSM_HEADER32_EX = construct.Struct(
      'bsm_header32_ex',
      BSM_HEADER,
      BSM_IP_TYPE_SHORT,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  # Token TEXT, provides extra information.
  BSM_TOKEN_TEXT = construct.Struct(
      'bsm_token_text',
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Path of the executable.
  BSM_TOKEN_PATH = BSM_TOKEN_TEXT

  # Identified the end of the record (follow by TRAILER).
  # status: integer that identifies the status of the exit (BSM_ERRORS).
  # return: returned value from the operation.
  BSM_TOKEN_RETURN32 = construct.Struct(
      'bsm_token_return32',
      construct.UBInt8('status'),
      construct.UBInt32('return_value'))

  BSM_TOKEN_RETURN64 = construct.Struct(
      'bsm_token_return64',
      construct.UBInt8('status'),
      construct.UBInt64('return_value'))

  # Identified the number of bytes that was written.
  # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
  # length: integer that has the number of bytes from the entry size.
  BSM_TOKEN_TRAILER = construct.Struct(
      'bsm_token_trailer',
      construct.UBInt16('magic'),
      construct.UBInt32('record_length'))

  # A 32-bits argument.
  # num_arg: the number of the argument.
  # name_arg: the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT32 = construct.Struct(
      'bsm_token_argument32',
      construct.UBInt8('num_arg'),
      construct.UBInt32('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # A 64-bits argument.
  # num_arg: integer, the number of the argument.
  # name_arg: text, the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT64 = construct.Struct(
      'bsm_token_argument64',
      construct.UBInt8('num_arg'),
      construct.UBInt64('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Identify an user.
  # terminal_id: unknown, research needed.
  # terminal_addr: unknown, research needed.
  BSM_TOKEN_SUBJECT32 = construct.Struct(
      'bsm_token_subject32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # Identify an user using a extended Token.
  # terminal_port: unknown, need research.
  # net_type: unknown, need research.
  BSM_TOKEN_SUBJECT32_EX = construct.Struct(
      'bsm_token_subject32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_opaque // AUT_OPAQUE
  BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

  # au_to_seq // AUT_SEQ
  BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

  # Program execution with options.
  # For each argument we are going to have a string+ "\x00".
  # Example: [00 00 00 02][41 42 43 00 42 42 00]
  #          2 Arguments, Arg1: [414243] Arg2: [4242].
  BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments')

  BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
      'bsm_token_exec_argument',
      construct.RepeatUntil(
          _BSMTokenIsEndOfString, construct.StaticField("text", 1)))

  # au_to_in_addr // AUT_IN_ADDR:
  BSM_TOKEN_ADDR = IPV4_STRUCT

  # au_to_in_addr_ext // AUT_IN_ADDR_EX:
  BSM_TOKEN_ADDR_EXT = construct.Struct(
      'bsm_token_addr_ext',
      construct.UBInt32('net_type'),
      IPV6_STRUCT)

  # au_to_ip // AUT_IP:
  # TODO: parse this header in the correct way.
  BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20)

  # au_to_ipc // AUT_IPC:
  BSM_TOKEN_IPC = construct.Struct(
      'bsm_token_ipc',
      construct.UBInt8('object_type'),
      construct.UBInt32('object_id'))

  # au_to_ipc_perm // au_to_ipc_perm
  BSM_TOKEN_IPC_PERM = construct.Struct(
      'bsm_token_ipc_perm',
      construct.UBInt32('user_id'),
      construct.UBInt32('group_id'),
      construct.UBInt32('creator_user_id'),
      construct.UBInt32('creator_group_id'),
      construct.UBInt32('access_mode'),
      construct.UBInt32('slot_seq'),
      construct.UBInt32('key'))

  # au_to_iport // AUT_IPORT:
  BSM_TOKEN_PORT = construct.UBInt16('port_number')

  # au_to_file // AUT_OTHER_FILE32:
  BSM_TOKEN_FILE = construct.Struct(
      'bsm_token_file',
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # au_to_subject64 // AUT_SUBJECT64:
  BSM_TOKEN_SUBJECT64 = construct.Struct(
      'bsm_token_subject64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_subject64_ex // AU_IPv4:
  BSM_TOKEN_SUBJECT64_EX = construct.Struct(
      'bsm_token_subject64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      construct.UBInt32('terminal_type'),
      BSM_IP_TYPE_SHORT)

  # au_to_process32 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS32 = construct.Struct(
      'bsm_token_process32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # au_to_process64 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS64 = construct.Struct(
      'bsm_token_process64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_process32_ex // AUT_PROCESS32_EX:
  BSM_TOKEN_PROCESS32_EX = construct.Struct(
      'bsm_token_process32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_process64_ex // AUT_PROCESS64_EX:
  BSM_TOKEN_PROCESS64_EX = construct.Struct(
      'bsm_token_process64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_sock_inet32 // AUT_SOCKINET32:
  BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
      'bsm_token_aut_sockinet32',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV4_STRUCT)

  # Info: checked against the source code of XNU, but not against
  #       real BSM file.
  BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
      'bsm_token_aut_sockinet128',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV6_STRUCT)

  INET6_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt64('saddr_high'),
      construct.UBInt64('saddr_low'),
      construct.UBInt16('destination_port'),
      construct.UBInt64('daddr_high'),
      construct.UBInt64('daddr_low'))

  INET4_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt32('source_address'),
      construct.UBInt16('destination_port'),
      construct.UBInt32('destination_address'))

  # au_to_socket_ex // AUT_SOCKET_EX
  # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
  BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
      'bsm_token_aut_sockinet32_ex',
      construct.UBInt16('socket_domain'),
      construct.UBInt16('socket_type'),
      construct.Switch(
          'structure_addr_port',
          _BSMTokenGetSocketDomain,
          {26: INET6_ADDR_TYPE},
          default=INET4_ADDR_TYPE))

  # au_to_sock_unix // AUT_SOCKUNIX
  BSM_TOKEN_SOCKET_UNIX = construct.Struct(
      'bsm_token_au_to_sock_unix',
      construct.UBInt16('family'),
      construct.RepeatUntil(
          _BSMTokenIsEndOfString,
          construct.StaticField("path", 1)))

  # au_to_data // au_to_data
  # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
  # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
  # unit_count: number of type values.
  # BSM_TOKEN_DATA has a end field = type * unit_count
  BSM_TOKEN_DATA = construct.Struct(
      'bsm_token_data',
      construct.UBInt8('how_to_print'),
      construct.UBInt8('data_type'),
      construct.UBInt8('unit_count'))

  # au_to_attr32 // AUT_ATTR32
  BSM_TOKEN_ATTR32 = construct.Struct(
      'bsm_token_attr32',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt32('device'))

  # au_to_attr64 // AUT_ATTR64
  BSM_TOKEN_ATTR64 = construct.Struct(
      'bsm_token_attr64',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt64('device'))

  # au_to_exit // AUT_EXIT
  BSM_TOKEN_EXIT = construct.Struct(
      'bsm_token_exit',
      construct.UBInt32('status'),
      construct.UBInt32('return_value'))

  # au_to_newgroups // AUT_NEWGROUPS
  # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
  BSM_TOKEN_GROUPS = construct.UBInt16('group_number')

  # au_to_exec_env == au_to_exec_args
  BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

  # au_to_zonename //AUT_ZONENAME
  BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

  # Token ID.
  # List of valid Token_ID.
  # Token_ID -> (NAME_STRUCTURE, STRUCTURE)
  # Only the checked structures are been added to the valid structures lists.
  _BSM_TOKEN_TYPES = {
      17: ('BSM_TOKEN_FILE', BSM_TOKEN_FILE),
      19: ('BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER),
      20: ('BSM_HEADER32', BSM_HEADER32),
      21: ('BSM_HEADER64', BSM_HEADER64),
      33: ('BSM_TOKEN_DATA', BSM_TOKEN_DATA),
      34: ('BSM_TOKEN_IPC', BSM_TOKEN_IPC),
      35: ('BSM_TOKEN_PATH', BSM_TOKEN_PATH),
      36: ('BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32),
      38: ('BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32),
      39: ('BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32),
      40: ('BSM_TOKEN_TEXT', BSM_TOKEN_TEXT),
      41: ('BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE),
      42: ('BSM_TOKEN_ADDR', BSM_TOKEN_ADDR),
      43: ('BSM_TOKEN_IP', BSM_TOKEN_IP),
      44: ('BSM_TOKEN_PORT', BSM_TOKEN_PORT),
      45: ('BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32),
      47: ('BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE),
      96: ('BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME),
      113: ('BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64),
      114: ('BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64),
      116: ('BSM_HEADER32_EX', BSM_HEADER32_EX),
      119: ('BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64),
      122: ('BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX),
      127: ('BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX),
      128: ('BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32)}

  # Untested structures.
  # When not tested structure is found, we try to parse using also
  # these structures.
  BSM_TYPE_LIST_NOT_TESTED = {
      49: ('BSM_TOKEN_ATTR', BSM_TOKEN_ATTR32),
      50: ('BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM),
      52: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      59: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      60: ('BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS),
      61: ('BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV),
      62: ('BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32),
      82: ('BSM_TOKEN_EXIT', BSM_TOKEN_EXIT),
      115: ('BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64),
      117: ('BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64),
      123: ('BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX),
      124: ('BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX),
      125: ('BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX),
      126: ('BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT),
      129: ('BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128),
      130: ('BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX)}

  MESSAGE_CAN_NOT_SAVE = (
      'Plaso: some tokens from this entry can not be saved. Entry at 0x{0:X} '
      'with unknown token id "0x{1:X}".')

  # BSM token types:
  # https://github.com/openbsm/openbsm/blob/master/sys/bsm/audit_record.h
  _BSM_TOKEN_TYPE_ARGUMENT32 = 45
  _BSM_TOKEN_TYPE_ARGUMENT64 = 113
  _BSM_TOKEN_TYPE_ATTR = 49
  _BSM_TOKEN_TYPE_ATTR32 = 62
  _BSM_TOKEN_TYPE_ATTR64 = 115
  _BSM_TOKEN_TYPE_EXEC_ARGUMENTS = 60
  _BSM_TOKEN_TYPE_EXEC_ENV = 61
  _BSM_TOKEN_TYPE_EXIT = 82
  _BSM_TOKEN_TYPE_HEADER32 = 20
  _BSM_TOKEN_TYPE_HEADER32_EX = 116
  _BSM_TOKEN_TYPE_HEADER64 = 21
  _BSM_TOKEN_TYPE_PATH = 35
  _BSM_TOKEN_TYPE_PROCESS32 = 38
  _BSM_TOKEN_TYPE_PROCESS32_EX = 123
  _BSM_TOKEN_TYPE_PROCESS64 = 119
  _BSM_TOKEN_TYPE_PROCESS64_EX = 124
  _BSM_TOKEN_TYPE_RETURN32 = 39
  _BSM_TOKEN_TYPE_RETURN64 = 114
  _BSM_TOKEN_TYPE_SUBJECT32 = 36
  _BSM_TOKEN_TYPE_SUBJECT32_EX = 122
  _BSM_TOKEN_TYPE_SUBJECT64 = 117
  _BSM_TOKEN_TYPE_SUBJECT64_EX = 125
  _BSM_TOKEN_TYPE_TEXT = 40
  _BSM_TOKEN_TYPE_ZONENAME = 96

  _BSM_ARGUMENT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ARGUMENT32,
      _BSM_TOKEN_TYPE_ARGUMENT64)

  _BSM_ATTR_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ATTR,
      _BSM_TOKEN_TYPE_ATTR32,
      _BSM_TOKEN_TYPE_ATTR64)

  _BSM_EXEV_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXEC_ARGUMENTS,
      _BSM_TOKEN_TYPE_EXEC_ENV)

  _BSM_HEADER_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_HEADER32,
      _BSM_TOKEN_TYPE_HEADER32_EX,
      _BSM_TOKEN_TYPE_HEADER64)

  _BSM_PROCESS_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32,
      _BSM_TOKEN_TYPE_PROCESS64)

  _BSM_PROCESS_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32_EX,
      _BSM_TOKEN_TYPE_PROCESS64_EX)

  _BSM_RETURN_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXIT,
      _BSM_TOKEN_TYPE_RETURN32,
      _BSM_TOKEN_TYPE_RETURN64)

  _BSM_SUBJECT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32,
      _BSM_TOKEN_TYPE_SUBJECT64)

  _BSM_SUBJECT_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32_EX,
      _BSM_TOKEN_TYPE_SUBJECT64_EX)

  _BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PATH,
      _BSM_TOKEN_TYPE_TEXT,
      _BSM_TOKEN_TYPE_ZONENAME)

  def __init__(self):
    """Initializes a parser object."""
    super(BSMParser, self).__init__()
    # Create the dictionary with all token IDs: tested and untested.
    self._bsm_type_list_all = self._BSM_TOKEN_TYPES.copy()
    self._bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

  def _CopyByteArrayToBase16String(self, byte_array):
    """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array (bytes): A byte array.

    Returns:
      str: a base-16 encoded Unicode string.
    """
    return ''.join(['{0:02x}'.format(byte) for byte in byte_array])

  def _CopyUtf8ByteArrayToString(self, byte_array):
    """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array (bytes): A byte array containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    byte_stream = b''.join(map(chr, byte_array))

    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning('Unable to decode UTF-8 formatted byte array.')
      string = byte_stream.decode('utf-8', errors='ignore')

    string, _, _ = string.partition(b'\x00')
    return string

  def _IPv4Format(self, address):
    """Formats an IPv4 address as a human readable string.

    Args:
      address (int): IPv4 address.

    Returns:
      str: human readable string of IPv4 address in 4 octet representation:
          "1.2.3.4".
    """
    ipv4_string = self.IPV4_STRUCT.build(address)
    return socket.inet_ntoa(ipv4_string)

  def _IPv6Format(self, high, low):
    """Formats an IPv6 address as a human readable string.

    Args:
      high (int): upper 64-bit part of the IPv6 address.
      low (int): lower 64-bit part of the IPv6 address.

    Returns:
      str: human readable string of IPv6 address.
    """
    ipv6_string = self.IPV6_STRUCT.build(
        construct.Container(high=high, low=low))
    # socket.inet_ntop not supported in Windows.
    if hasattr(socket, 'inet_ntop'):
      return socket.inet_ntop(socket.AF_INET6, ipv6_string)

    # TODO: this approach returns double "::", illegal IPv6 addr.
    str_address = binascii.hexlify(ipv6_string)
    address = []
    blank = False
    for pos in range(0, len(str_address), 4):
      if str_address[pos:pos + 4] == '0000':
        if not blank:
          address.append('')
          blank = True
      else:
        blank = False
        address.append(str_address[pos:pos + 4].lstrip('0'))
    return ':'.join(address)

  def _ParseBSMEvent(self, parser_mediator, file_object):
    """Parses a BSM entry (BSMEvent) from the file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if the BSM entry was parsed.
    """
    record_start_offset = file_object.tell()

    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM token type at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      parser_mediator.ProduceExtractionError(
          'unsupported token type: {0:d} at offset: 0x{1:08x}.'.format(
              token_type, record_start_offset))
      # TODO: if it is a Mac OS X, search for the trailer magic value
      #       as a end of the entry can be a possibility to continue.
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      token = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM record at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    event_type = bsmtoken.BSM_AUDIT_EVENT.get(
        token.bsm_header.event_type, 'UNKNOWN')
    event_type = '{0:s} ({1:d})'.format(
        event_type, token.bsm_header.event_type)

    timestamp = (token.timestamp * 1000000) + token.microseconds
    date_time = dfdatetime_posix_time.PosixTimeInMicroseconds(
        timestamp=timestamp)

    record_length = token.bsm_header.length
    record_end_offset = record_start_offset + record_length

    # A dict of tokens that has the entry.
    extra_tokens = {}

    # Read until we reach the end of the record.
    while file_object.tell() < record_end_offset:
      # Check if it is a known token.
      try:
        token_type = self._BSM_TOKEN.parse_stream(file_object)
      except (IOError, construct.FieldError):
        logging.warning(
            'Unable to parse the Token ID at position: {0:d}'.format(
                file_object.tell()))
        return False

      _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

      if not record_structure:
        pending = record_end_offset - file_object.tell()
        new_extra_tokens = self.TryWithUntestedStructures(
            file_object, token_type, pending)
        extra_tokens.update(new_extra_tokens)
      else:
        token = record_structure.parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_type, token, file_object)
        extra_tokens.update(new_extra_tokens)

    if file_object.tell() > record_end_offset:
      logging.warning(
          'Token ID {0:d} not expected at position 0x{1:08x}.'
          'Jumping for the next entry.'.format(
              token_type, file_object.tell()))
      try:
        file_object.seek(
            record_end_offset - file_object.tell(), os.SEEK_CUR)
      except (IOError, construct.FieldError) as exception:
        logging.warning(
            'Unable to jump to next entry with error: {0:s}'.format(exception))
        return False

    # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
    if parser_mediator.platform != 'MacOSX':
      event_data = BSMEventData()
    else:
      event_data = MacBSMEventData()

      # In Mac OS X the last two tokens are the return status and the trailer.
      return_value = extra_tokens.get('BSM_TOKEN_RETURN32')
      if not return_value:
        return_value = extra_tokens.get('BSM_TOKEN_RETURN64')
      if not return_value:
        return_value = 'UNKNOWN'

      event_data.return_value = return_value

    event_data.event_type = event_type
    event_data.extra_tokens = extra_tokens
    event_data.offset = record_start_offset
    event_data.record_length = record_length

    # TODO: check why trailer was passed to event in original while
    # event was expecting record length.
    # if extra_tokens:
    #   trailer = extra_tokens.get('BSM_TOKEN_TRAILER', 'unknown')

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_CREATION)
    parser_mediator.ProduceEventWithEventData(event, event_data)

    return True

  def _RawToUTF8(self, byte_stream):
    """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream (bytes): byte stream containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning(
          'Decode UTF8 failed, the message string may be cut short.')
      string = byte_stream.decode('utf-8', errors='ignore')
    return string.partition(b'\x00')[0]

  def ParseFileObject(self, parser_mediator, file_object, **kwargs):
    """Parses a BSM file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    try:
      is_bsm = self.VerifyFile(parser_mediator, file_object)
    except (IOError, construct.FieldError) as exception:
      raise errors.UnableToParseFile(
          'Unable to parse BSM file with error: {0:s}'.format(exception))

    if not is_bsm:
      raise errors.UnableToParseFile('Not a BSM File, unable to parse.')

    file_object.seek(0, os.SEEK_SET)

    while self._ParseBSMEvent(parser_mediator, file_object):
      pass

  def VerifyFile(self, parser_mediator, file_object):
    """Check if the file is a BSM file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if this is a valid BSM file, False otherwise.
    """
    # First part of the entry is always a Header.
    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      header = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
      return False

    try:
      token_identifier = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    # If is Mac OS X BSM file, next entry is a  text token indicating
    # if it is a normal start or it is a recovery track.
    if parser_mediator.platform == 'MacOSX':
      token_type, record_structure = self._BSM_TOKEN_TYPES.get(
          token_identifier, ('', None))

      if not record_structure:
        return False

      if token_type != 'BSM_TOKEN_TEXT':
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

      try:
        token = record_structure.parse_stream(file_object)
      except (IOError, construct.FieldError):
        return

      text = self._CopyUtf8ByteArrayToString(token.text)
      if (text != 'launchctl::Audit startup' and
          text != 'launchctl::Audit recovery'):
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

    return True

  def TryWithUntestedStructures(self, file_object, token_id, pending):
    """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
    # Data from the unknown structure.
    start_position = file_object.tell()
    start_token_id = token_id
    extra_tokens = {}

    # Read all the "pending" bytes.
    try:
      if token_id in self._bsm_type_list_all:
        token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_id, token, file_object)
        extra_tokens.update(new_extra_tokens)
        while file_object.tell() < (start_position + pending):
          # Check if it is a known token.
          try:
            token_id = self._BSM_TOKEN.parse_stream(file_object)
          except (IOError, construct.FieldError):
            logging.warning(
                'Unable to parse the Token ID at position: {0:d}'.format(
                    file_object.tell()))
            return
          if token_id not in self._bsm_type_list_all:
            break
          token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
          new_extra_tokens = self.FormatToken(token_id, token, file_object)
          extra_tokens.update(new_extra_tokens)
    except (IOError, construct.FieldError):
      token_id = 255

    next_entry = (start_position + pending)
    if file_object.tell() != next_entry:
      # Unknown Structure.
      logging.warning('Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
          start_position - 1, token_id, token_id))
      # TODO: another way to save this information must be found.
      extra_tokens.update(
          {'message': self.MESSAGE_CAN_NOT_SAVE.format(
              start_position - 1, start_token_id)})
      # Move to next entry.
      file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
      # It returns null list because it doesn't know witch structure was
      # the incorrect structure that makes that it can arrive to the spected
      # end of the entry.
      return {}
    return extra_tokens

  def FormatToken(self, token_id, token, file_object):
    """Parse the Token depending of the type of the structure.

    Args:
      token_id (int): identification of the token_type.
      token (structure): token struct to parse.
      file_object: BSM file.

    Returns:
      (dict): parsed Token values.

    Keys for returned dictionary are token name like BSM_TOKEN_SUBJECT32.
    Values of this dictionary are key-value pairs like terminal_ip:127.0.0.1.
    """
    if token_id not in self._bsm_type_list_all:
      return {}

    bsm_type, _ = self._bsm_type_list_all.get(token_id, ['', ''])

    if token_id in self._BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES:
      try:
        string = self._CopyUtf8ByteArrayToString(token.text)
      except TypeError:
        string = 'Unknown'
      return {bsm_type: string}

    elif token_id in self._BSM_RETURN_TOKEN_TYPES:
      return {bsm_type: {
          'error': bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'),
          'token_status': token.status,
          'call_status': token.return_value
      }}

    elif token_id in self._BSM_SUBJECT_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_SUBJECT_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif token_id in self._BSM_ARGUMENT_TOKEN_TYPES:
      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {
          'string': string,
          'num_arg': token.num_arg,
          'is': token.name_arg}}

    elif token_id in self._BSM_EXEV_TOKEN_TYPES:
      arguments = []
      for _ in range(0, token):
        sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object)
        string = self._CopyUtf8ByteArrayToString(sub_token.text)
        arguments.append(string)
      return {bsm_type: ' '.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv4Format(token.ipv4)
      }}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_ADDR':
      return {bsm_type: self._IPv4Format(token)}

    elif bsm_type == 'BSM_TOKEN_IP':
      return {'IPv4_Header': '0x{0:s}]'.format(token.encode('hex'))}

    elif bsm_type == 'BSM_TOKEN_ADDR_EXT':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_PORT':
      return {bsm_type: token}

    elif bsm_type == 'BSM_TOKEN_TRAILER':
      return {bsm_type: token.record_length}

    elif bsm_type == 'BSM_TOKEN_FILE':
      # TODO: if this timestamp is usefull, it must be extracted as a separate
      #       event object.
      timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
          token.timestamp, token.microseconds)
      date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
      date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S')

      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {'string': string, 'timestamp': date_time_string}}

    elif bsm_type == 'BSM_TOKEN_IPC':
      return {bsm_type: {
          'object_type': token.object_type,
          'object_id': token.object_id
      }}

    elif token_id in self._BSM_PROCESS_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_PROCESS_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif bsm_type == 'BSM_TOKEN_DATA':
      data = []
      data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '')

      if data_type == 'AUR_CHAR':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))

      elif data_type == 'AUR_SHORT':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_SHORT.parse_stream(file_object))

      elif data_type == 'AUR_INT32':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))

      else:
        data.append('Unknown type data')

      # TODO: the data when it is string ends with ".", HW a space is return
      #       after uses the UTF-8 conversion.
      return {bsm_type: {
          'format': bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
          'data':
          '{0}'.format(self._RawToUTF8(''.join(map(str, data))))
      }}

    elif token_id in self._BSM_ATTR_TOKEN_TYPES:
      return {bsm_type: {
          'mode': token.file_mode,
          'uid': token.uid,
          'gid': token.gid,
          'system_id': token.file_system_id,
          'node_id': token.file_system_node_id,
          'device': token.device}}

    elif bsm_type == 'BSM_TOKEN_GROUPS':
      arguments = []
      for _ in range(token):
        arguments.append(
            self._RawToUTF8(
                self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
      return {bsm_type: ','.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX':
      if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6':
        saddr = self._IPv6Format(
            token.structure_addr_port.saddr_high,
            token.structure_addr_port.saddr_low)
        daddr = self._IPv6Format(
            token.structure_addr_port.daddr_high,
            token.structure_addr_port.daddr_low)
      else:
        saddr = self._IPv4Format(token.structure_addr_port.source_address)
        daddr = self._IPv4Format(token.structure_addr_port.destination_address)

      return {bsm_type:{
          'from': saddr,
          'from_port': token.structure_addr_port.source_port,
          'to': daddr,
          'to_port': token.structure_addr_port.destination_port}}

    elif bsm_type == 'BSM_TOKEN_IPC_PERM':
      return {bsm_type: {
          'user_id': token.user_id,
          'group_id': token.group_id,
          'creator_user_id': token.creator_user_id,
          'creator_group_id': token.creator_group_id,
          'access': token.access_mode}}

    elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX':
      string = self._CopyUtf8ByteArrayToString(token.path)
      return {bsm_type: {'family': token.family, 'path': string}}

    elif bsm_type == 'BSM_TOKEN_OPAQUE':
      string = self._CopyByteArrayToBase16String(token.text)
      return {bsm_type: string}

    elif bsm_type == 'BSM_TOKEN_SEQUENCE':
      return {bsm_type: token}

Example #23

Show file

class BsmParser(interface.FileObjectParser):
    """Parser for BSM files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'bsm_log'
    DESCRIPTION = u'Parser for BSM log files.'

    # BSM supported version (0x0b = 11).
    AUDIT_HEADER_VERSION = 11

    # Magic Trail Header.
    BSM_TOKEN_TRAILER_MAGIC = b'b105'

    # IP Version constants.
    AU_IPv4 = 4
    AU_IPv6 = 16

    IPV4_STRUCT = construct.UBInt32(u'ipv4')

    IPV6_STRUCT = construct.Struct(u'ipv6', construct.UBInt64(u'high'),
                                   construct.UBInt64(u'low'))

    # Tested structures.
    # INFO: I have ommited the ID in the structures declaration.
    #       I used the BSM_TYPE first to read the ID, and then, the structure.
    # Tokens always start with an ID value that identifies their token
    # type and subsequent structure.
    BSM_TYPE = construct.UBInt8(u'token_id')

    # Data type structures.
    BSM_TOKEN_DATA_CHAR = construct.String(u'value', 1)
    BSM_TOKEN_DATA_SHORT = construct.UBInt16(u'value')
    BSM_TOKEN_DATA_INTEGER = construct.UBInt32(u'value')

    # Common structure used by other structures.
    # audit_uid: integer, uid that generates the entry.
    # effective_uid: integer, the permission user used.
    # effective_gid: integer, the permission group used.
    # real_uid: integer, user id of the user that execute the process.
    # real_gid: integer, group id of the group that execute the process.
    # pid: integer, identification number of the process.
    # session_id: unknown, need research.
    BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
        u'subject_data', construct.UBInt32(u'audit_uid'),
        construct.UBInt32(u'effective_uid'),
        construct.UBInt32(u'effective_gid'), construct.UBInt32(u'real_uid'),
        construct.UBInt32(u'real_gid'), construct.UBInt32(u'pid'),
        construct.UBInt32(u'session_id'))

    # Common structure used by other structures.
    # Identify the kind of inet (IPv4 or IPv6)
    # TODO: instead of 16, AU_IPv6 must be used.
    BSM_IP_TYPE_SHORT = construct.Struct(
        u'bsm_ip_type_short', construct.UBInt32(u'net_type'),
        construct.Switch(u'ip_addr',
                         _BsmTokenGetNetType, {16: IPV6_STRUCT},
                         default=IPV4_STRUCT))

    # Initial fields structure used by header structures.
    # length: integer, the length of the entry, equal to trailer (doc: length).
    # version: integer, version of BSM (AUDIT_HEADER_VERSION).
    # event_type: integer, the type of event (/etc/security/audit_event).
    # modifier: integer, unknown, need research (It is always 0).
    BSM_HEADER = construct.Struct(u'bsm_header', construct.UBInt32(u'length'),
                                  construct.UBInt8(u'version'),
                                  construct.UBInt16(u'event_type'),
                                  construct.UBInt16(u'modifier'))

    # First token of one entry.
    # timestamp: unsigned integer, number of seconds since
    #            January 1, 1970 00:00:00 UTC.
    # microsecond: unsigned integer, number of micro seconds.
    BSM_HEADER32 = construct.Struct(u'bsm_header32', BSM_HEADER,
                                    construct.UBInt32(u'timestamp'),
                                    construct.UBInt32(u'microsecond'))

    BSM_HEADER64 = construct.Struct(u'bsm_header64', BSM_HEADER,
                                    construct.UBInt64(u'timestamp'),
                                    construct.UBInt64(u'microsecond'))

    BSM_HEADER32_EX = construct.Struct(u'bsm_header32_ex', BSM_HEADER,
                                       BSM_IP_TYPE_SHORT,
                                       construct.UBInt32(u'timestamp'),
                                       construct.UBInt32(u'microsecond'))

    # Token TEXT, provides extra information.
    BSM_TOKEN_TEXT = construct.Struct(
        u'bsm_token_text', construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Path of the executable.
    BSM_TOKEN_PATH = BSM_TOKEN_TEXT

    # Identified the end of the record (follow by TRAILER).
    # status: integer that identifies the status of the exit (BSM_ERRORS).
    # return: returned value from the operation.
    BSM_TOKEN_RETURN32 = construct.Struct(u'bsm_token_return32',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt32(u'return_value'))

    BSM_TOKEN_RETURN64 = construct.Struct(u'bsm_token_return64',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt64(u'return_value'))

    # Identified the number of bytes that was written.
    # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
    # length: integer that has the number of bytes from the entry size.
    BSM_TOKEN_TRAILER = construct.Struct(u'bsm_token_trailer',
                                         construct.UBInt16(u'magic'),
                                         construct.UBInt32(u'record_length'))

    # A 32-bits argument.
    # num_arg: the number of the argument.
    # name_arg: the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT32 = construct.Struct(
        u'bsm_token_argument32', construct.UBInt8(u'num_arg'),
        construct.UBInt32(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # A 64-bits argument.
    # num_arg: integer, the number of the argument.
    # name_arg: text, the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT64 = construct.Struct(
        u'bsm_token_argument64', construct.UBInt8(u'num_arg'),
        construct.UBInt64(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Identify an user.
    # terminal_id: unknown, research needed.
    # terminal_addr: unknown, research needed.
    BSM_TOKEN_SUBJECT32 = construct.Struct(u'bsm_token_subject32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # Identify an user using a extended Token.
    # terminal_port: unknown, need research.
    # net_type: unknown, need research.
    BSM_TOKEN_SUBJECT32_EX = construct.Struct(
        u'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_opaque // AUT_OPAQUE
    BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

    # au_to_seq // AUT_SEQ
    BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

    # Program execution with options.
    # For each argument we are going to have a string+ "\x00".
    # Example: [00 00 00 02][41 42 43 00 42 42 00]
    #          2 Arguments, Arg1: [414243] Arg2: [4242].
    BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32(u'number_arguments')

    BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
        u'bsm_token_exec_argument',
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("text", 1)))

    # au_to_in_addr // AUT_IN_ADDR:
    BSM_TOKEN_ADDR = IPV4_STRUCT

    # au_to_in_addr_ext // AUT_IN_ADDR_EX:
    BSM_TOKEN_ADDR_EXT = construct.Struct(u'bsm_token_addr_ext',
                                          construct.UBInt32(u'net_type'),
                                          IPV6_STRUCT)

    # au_to_ip // AUT_IP:
    # TODO: parse this header in the correct way.
    BSM_TOKEN_IP = construct.String(u'binary_ipv4_add', 20)

    # au_to_ipc // AUT_IPC:
    BSM_TOKEN_IPC = construct.Struct(u'bsm_token_ipc',
                                     construct.UBInt8(u'object_type'),
                                     construct.UBInt32(u'object_id'))

    # au_to_ipc_perm // au_to_ipc_perm
    BSM_TOKEN_IPC_PERM = construct.Struct(
        u'bsm_token_ipc_perm', construct.UBInt32(u'user_id'),
        construct.UBInt32(u'group_id'), construct.UBInt32(u'creator_user_id'),
        construct.UBInt32(u'creator_group_id'),
        construct.UBInt32(u'access_mode'), construct.UBInt32(u'slot_seq'),
        construct.UBInt32(u'key'))

    # au_to_iport // AUT_IPORT:
    BSM_TOKEN_PORT = construct.UBInt16(u'port_number')

    # au_to_file // AUT_OTHER_FILE32:
    BSM_TOKEN_FILE = construct.Struct(
        u'bsm_token_file', construct.UBInt32(u'timestamp'),
        construct.UBInt32(u'microsecond'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # au_to_subject64 // AUT_SUBJECT64:
    BSM_TOKEN_SUBJECT64 = construct.Struct(u'bsm_token_subject64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_subject64_ex // AU_IPv4:
    BSM_TOKEN_SUBJECT64_EX = construct.Struct(
        u'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'),
        construct.UBInt32(u'terminal_type'), BSM_IP_TYPE_SHORT)

    # au_to_process32 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS32 = construct.Struct(u'bsm_token_process32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process64 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS64 = construct.Struct(u'bsm_token_process64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process32_ex // AUT_PROCESS32_EX:
    BSM_TOKEN_PROCESS32_EX = construct.Struct(
        u'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_process64_ex // AUT_PROCESS64_EX:
    BSM_TOKEN_PROCESS64_EX = construct.Struct(
        u'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt64(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_sock_inet32 // AUT_SOCKINET32:
    BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
        u'bsm_token_aut_sockinet32', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV4_STRUCT)

    # Info: checked against the source code of XNU, but not against
    #       real BSM file.
    BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
        u'bsm_token_aut_sockinet128', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV6_STRUCT)

    INET6_ADDR_TYPE = construct.Struct(u'addr_type',
                                       construct.UBInt16(u'ip_type'),
                                       construct.UBInt16(u'source_port'),
                                       construct.UBInt64(u'saddr_high'),
                                       construct.UBInt64(u'saddr_low'),
                                       construct.UBInt16(u'destination_port'),
                                       construct.UBInt64(u'daddr_high'),
                                       construct.UBInt64(u'daddr_low'))

    INET4_ADDR_TYPE = construct.Struct(
        u'addr_type', construct.UBInt16(u'ip_type'),
        construct.UBInt16(u'source_port'),
        construct.UBInt32(u'source_address'),
        construct.UBInt16(u'destination_port'),
        construct.UBInt32(u'destination_address'))

    # au_to_socket_ex // AUT_SOCKET_EX
    # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
    BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
        u'bsm_token_aut_sockinet32_ex', construct.UBInt16(u'socket_domain'),
        construct.UBInt16(u'socket_type'),
        construct.Switch(u'structure_addr_port',
                         _BsmTokenGetSocketDomain, {26: INET6_ADDR_TYPE},
                         default=INET4_ADDR_TYPE))

    # au_to_sock_unix // AUT_SOCKUNIX
    BSM_TOKEN_SOCKET_UNIX = construct.Struct(
        u'bsm_token_au_to_sock_unix', construct.UBInt16(u'family'),
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("path", 1)))

    # au_to_data // au_to_data
    # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
    # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
    # unit_count: number of type values.
    # BSM_TOKEN_DATA has a end field = type * unit_count
    BSM_TOKEN_DATA = construct.Struct(u'bsm_token_data',
                                      construct.UBInt8(u'how_to_print'),
                                      construct.UBInt8(u'data_type'),
                                      construct.UBInt8(u'unit_count'))

    # au_to_attr32 // AUT_ATTR32
    BSM_TOKEN_ATTR32 = construct.Struct(
        u'bsm_token_attr32', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt32(u'device'))

    # au_to_attr64 // AUT_ATTR64
    BSM_TOKEN_ATTR64 = construct.Struct(
        u'bsm_token_attr64', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt64(u'device'))

    # au_to_exit // AUT_EXIT
    BSM_TOKEN_EXIT = construct.Struct(u'bsm_token_exit',
                                      construct.UBInt32(u'status'),
                                      construct.UBInt32(u'return_value'))

    # au_to_newgroups // AUT_NEWGROUPS
    # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
    BSM_TOKEN_GROUPS = construct.UBInt16(u'group_number')

    # au_to_exec_env == au_to_exec_args
    BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

    # au_to_zonename //AUT_ZONENAME
    BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

    # Token ID.
    # List of valid Token_ID.
    # Token_ID -> [NAME_STRUCTURE, STRUCTURE]
    # Only the checked structures are been added to the valid structures lists.
    BSM_TYPE_LIST = {
        17: [u'BSM_TOKEN_FILE', BSM_TOKEN_FILE],
        19: [u'BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER],
        20: [u'BSM_HEADER32', BSM_HEADER32],
        21: [u'BSM_HEADER64', BSM_HEADER64],
        33: [u'BSM_TOKEN_DATA', BSM_TOKEN_DATA],
        34: [u'BSM_TOKEN_IPC', BSM_TOKEN_IPC],
        35: [u'BSM_TOKEN_PATH', BSM_TOKEN_PATH],
        36: [u'BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32],
        38: [u'BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32],
        39: [u'BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32],
        40: [u'BSM_TOKEN_TEXT', BSM_TOKEN_TEXT],
        41: [u'BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE],
        42: [u'BSM_TOKEN_ADDR', BSM_TOKEN_ADDR],
        43: [u'BSM_TOKEN_IP', BSM_TOKEN_IP],
        44: [u'BSM_TOKEN_PORT', BSM_TOKEN_PORT],
        45: [u'BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32],
        47: [u'BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE],
        96: [u'BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME],
        113: [u'BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64],
        114: [u'BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64],
        116: [u'BSM_HEADER32_EX', BSM_HEADER32_EX],
        119: [u'BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64],
        122: [u'BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX],
        127: [u'BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX],
        128: [u'BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32]
    }

    # Untested structures.
    # When not tested structure is found, we try to parse using also
    # these structures.
    BSM_TYPE_LIST_NOT_TESTED = {
        49: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        50: [u'BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM],
        52: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        59: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        60: [u'BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS],
        61: [u'BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV],
        62: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        82: [u'BSM_TOKEN_EXIT', BSM_TOKEN_EXIT],
        115: [u'BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64],
        117: [u'BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64],
        123: [u'BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX],
        124: [u'BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX],
        125: [u'BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX],
        126: [u'BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT],
        129: [u'BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128],
        130: [u'BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX]
    }

    def __init__(self):
        """Initializes a parser object."""
        super(BsmParser, self).__init__()
        # Create the dictionary with all token IDs: tested and untested.
        self.bsm_type_list_all = self.BSM_TYPE_LIST.copy()
        self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

    def _CopyByteArrayToBase16String(self, byte_array):
        """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array: A byte array.

    Returns:
      A base-16 encoded Unicode string.
    """
        return u''.join([u'{0:02x}'.format(byte) for byte in byte_array])

    def _CopyUtf8ByteArrayToString(self, byte_array):
        """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array: A byte array containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        byte_stream = b''.join(map(chr, byte_array))

        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(u'Unable to decode UTF-8 formatted byte array.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')

        string, _, _ = string.partition(b'\x00')
        return string

    def _IPv4Format(self, address):
        """Change an integer IPv4 address value for its 4 octets representation.

    Args:
      address: integer with the IPv4 address.

    Returns:
      IPv4 address in 4 octet representation (class A, B, C, D).
    """
        ipv4_string = self.IPV4_STRUCT.build(address)
        return socket.inet_ntoa(ipv4_string)

    def _IPv6Format(self, high, low):
        """Provide a readable IPv6 IP having the high and low part in 2 integers.

    Args:
      high: 64 bits integers number with the high part of the IPv6.
      low: 64 bits integers number with the low part of the IPv6.

    Returns:
      String with a well represented IPv6.
    """
        ipv6_string = self.IPV6_STRUCT.build(
            construct.Container(high=high, low=low))
        # socket.inet_ntop not supported in Windows.
        if hasattr(socket, u'inet_ntop'):
            return socket.inet_ntop(socket.AF_INET6, ipv6_string)

        # TODO: this approach returns double "::", illegal IPv6 addr.
        str_address = binascii.hexlify(ipv6_string)
        address = []
        blank = False
        for pos in range(0, len(str_address), 4):
            if str_address[pos:pos + 4] == u'0000':
                if not blank:
                    address.append(u'')
                    blank = True
            else:
                blank = False
                address.append(str_address[pos:pos + 4].lstrip(u'0'))
        return u':'.join(address)

    def _RawToUTF8(self, byte_stream):
        """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream: A byte stream containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(
                u'Decode UTF8 failed, the message string may be cut short.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')
        return string.partition(b'\x00')[0]

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a BSM file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            is_bsm = self.VerifyFile(parser_mediator, file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse BSM file with error: {0:s}'.format(
                    exception))

        if not is_bsm:
            raise errors.UnableToParseFile(u'Not a BSM File, unable to parse.')

        event_object = self.ReadBSMEvent(parser_mediator, file_object)
        while event_object:
            parser_mediator.ProduceEvent(event_object)

            event_object = self.ReadBSMEvent(parser_mediator, file_object)

    def ReadBSMEvent(self, parser_mediator, file_object):
        """Returns a BsmEvent from a single BSM entry.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Returns:
      An event object.
    """
        # A list of tokens that has the entry.
        extra_tokens = []

        offset = file_object.tell()

        # Token header, first token for each entry.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        if bsm_type == u'BSM_HEADER32':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER64':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER32_EX':
            token = structure.parse_stream(file_object)
        else:
            logging.warning(
                u'Token ID Header {0} not expected at position 0x{1:X}.'
                u'The parsing of the file cannot be continued'.format(
                    token_id, file_object.tell()))
            # TODO: if it is a Mac OS X, search for the trailer magic value
            #       as a end of the entry can be a possibility to continue.
            return

        length = token.bsm_header.length
        event_type = u'{0} ({1})'.format(
            bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type,
                                         u'UNKNOWN'),
            token.bsm_header.event_type)
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            token.timestamp, token.microsecond)

        # Read until we reach the end of the record.
        while file_object.tell() < (offset + length):
            # Check if it is a known token.
            try:
                token_id = self.BSM_TYPE.parse_stream(file_object)
            except (IOError, construct.FieldError):
                logging.warning(
                    u'Unable to parse the Token ID at position: {0:d}'.format(
                        file_object.tell()))
                return
            if not token_id in self.BSM_TYPE_LIST:
                pending = (offset + length) - file_object.tell()
                extra_tokens.extend(
                    self.TryWithUntestedStructures(file_object, token_id,
                                                   pending))
            else:
                token = self.BSM_TYPE_LIST[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))

        if file_object.tell() > (offset + length):
            logging.warning(u'Token ID {0} not expected at position 0x{1:X}.'
                            u'Jumping for the next entry.'.format(
                                token_id, file_object.tell()))
            try:
                file_object.seek((offset + length) - file_object.tell(),
                                 os.SEEK_CUR)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to jump to next entry with error: {0:s}'.format(
                        exception))
                return

        # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
        if parser_mediator.platform == u'MacOSX':
            # In Mac OS X the last two tokens are the return status and the trailer.
            if len(extra_tokens) >= 2:
                return_value = extra_tokens[-2:-1][0]
                if (return_value.startswith(u'[BSM_TOKEN_RETURN32')
                        or return_value.startswith(u'[BSM_TOKEN_RETURN64')):
                    _ = extra_tokens.pop(len(extra_tokens) - 2)
                else:
                    return_value = u'Return unknown'
            else:
                return_value = u'Return unknown'
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return MacBsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                               return_value, trailer, offset)
        else:
            # Generic BSM format.
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return BsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                            trailer, offset)

    def VerifyFile(self, parser_mediator, file_object):
        """Check if the file is a BSM file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_event: file that we want to check.

    Returns:
      True if this is a valid BSM file, otherwise False.
    """
        if file_object.tell() != 0:
            file_object.seek(0)

        # First part of the entry is always a Header.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False
        if token_id not in self.BSM_TYPE_LIST:
            return False

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        try:
            if bsm_type == u'BSM_HEADER32':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER64':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER32_EX':
                header = structure.parse_stream(file_object)
            else:
                return False
        except (IOError, construct.FieldError):
            return False
        if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
            return False

        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False

        # If is Mac OS X BSM file, next entry is a  text token indicating
        # if it is a normal start or it is a recovery track.
        if parser_mediator.platform == u'MacOSX':
            bsm_type_list = self.BSM_TYPE_LIST.get(token_id)
            if not bsm_type_list:
                return False

            if bsm_type_list[0] != u'BSM_TOKEN_TEXT':
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False
            try:
                token = self.BSM_TOKEN_TEXT.parse_stream(file_object)
            except (IOError, construct.FieldError):
                return

            text = self._CopyUtf8ByteArrayToString(token.text)
            if (text != u'launchctl::Audit startup'
                    and text != u'launchctl::Audit recovery'):
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False

        file_object.seek(0)
        return True

    def TryWithUntestedStructures(self, file_object, token_id, pending):
        """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
        # Data from the unknown structure.
        start_position = file_object.tell()
        start_token_id = token_id
        extra_tokens = []

        # Read all the "pending" bytes.
        try:
            if token_id in self.bsm_type_list_all:
                token = self.bsm_type_list_all[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))
                while file_object.tell() < (start_position + pending):
                    # Check if it is a known token.
                    try:
                        token_id = self.BSM_TYPE.parse_stream(file_object)
                    except (IOError, construct.FieldError):
                        logging.warning(
                            u'Unable to parse the Token ID at position: {0:d}'.
                            format(file_object.tell()))
                        return
                    if token_id not in self.bsm_type_list_all:
                        break
                    token = self.bsm_type_list_all[token_id][1].parse_stream(
                        file_object)
                    extra_tokens.append(
                        self.FormatToken(token_id, token, file_object))
        except (IOError, construct.FieldError):
            token_id = 255

        next_entry = (start_position + pending)
        if file_object.tell() != next_entry:
            # Unknown Structure.
            logging.warning(
                u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
                    start_position - 1, token_id, token_id))
            # TODO: another way to save this information must be found.
            extra_tokens.append(u'Plaso: some tokens from this entry can '
                                u'not be saved. Entry at 0x{0:X} with unknown '
                                u'token id "0x{1:X}".'.format(
                                    start_position - 1, start_token_id))
            # Move to next entry.
            file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
            # It returns null list because it doesn't know witch structure was
            # the incorrect structure that makes that it can arrive to the spected
            # end of the entry.
            return []
        return extra_tokens

    # TODO: instead of compare the text to know what structure was parsed
    #       is better to compare directly the numeric number (token_id),
    #       less readable, but better performance.
    def FormatToken(self, token_id, token, file_object):
        """Parse the Token depending of the type of the structure.

    Args:
      token_id: Identification integer of the token_type.
      token: Token struct to parse.
      file_object: BSM file.

    Returns:
      String with the parsed Token values.
    """
        if token_id not in self.bsm_type_list_all:
            return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id)

        bsm_type, _ = self.bsm_type_list_all.get(token_id, [u'', u''])

        if bsm_type in [
                u'BSM_TOKEN_TEXT', u'BSM_TOKEN_PATH', u'BSM_TOKEN_ZONENAME'
        ]:
            try:
                string = self._CopyUtf8ByteArrayToString(token.text)
            except TypeError:
                string = u'Unknown'
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type in [
                u'BSM_TOKEN_RETURN32', u'BSM_TOKEN_RETURN64', u'BSM_TOKEN_EXIT'
        ]:
            return u'[{0}: {1} ({2}), System call status: {3}]'.format(
                bsm_type, bsmtoken.BSM_ERRORS.get(token.status, u'Unknown'),
                token.status, token.return_value)

        elif bsm_type in [u'BSM_TOKEN_SUBJECT32', u'BSM_TOKEN_SUBJECT64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_SUBJECT32_EX', u'BSM_TOKEN_SUBJECT64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type in [u'BSM_TOKEN_ARGUMENT32', u'BSM_TOKEN_ARGUMENT64']:
            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format(
                bsm_type, string, token.num_arg, token.name_arg)

        elif bsm_type in [u'BSM_TOKEN_EXEC_ARGUMENTS', u'BSM_TOKEN_EXEC_ENV']:
            arguments = []
            for _ in range(0, token):
                sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(
                    file_object)
                string = self._CopyUtf8ByteArrayToString(sub_token.text)
                arguments.append(string)
            return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32':
            return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv4Format(token.ipv4)))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET128':
            return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_ADDR':
            return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token))

        elif bsm_type == u'BSM_TOKEN_IP':
            return u'[IPv4_Header: 0x{0:s}]'.format(token.encode(u'hex'))

        elif bsm_type == u'BSM_TOKEN_ADDR_EXT':
            return u'[{0}: {1} ({2}). Address {3}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_PORT':
            return u'[{0}: {1}]'.format(bsm_type, token)

        elif bsm_type == u'BSM_TOKEN_TRAILER':
            return u'[{0}: {1}]'.format(bsm_type, token.record_length)

        elif bsm_type == u'BSM_TOKEN_FILE':
            # TODO: if this timestamp is usefull, it must be extracted as a separate
            #       event object.
            timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
                token.timestamp, token.microsecond)
            date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
            date_time_string = date_time.strftime(u'%Y-%m-%d %H:%M:%S')

            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}, timestamp: {2:s}]'.format(
                bsm_type, string, date_time_string)

        elif bsm_type == u'BSM_TOKEN_IPC':
            return u'[{0}: object type {1}, object id {2}]'.format(
                bsm_type, token.object_type, token.object_id)

        elif bsm_type in [u'BSM_TOKEN_PROCESS32', u'BSM_TOKEN_PROCESS64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_PROCESS32_EX', u'BSM_TOKEN_PROCESS64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type == u'BSM_TOKEN_DATA':
            data = []
            data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, u'')
            if data_type == u'AUR_CHAR':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))
            elif data_type == u'AUR_SHORT':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object))
            elif data_type == u'AUR_INT32':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))
            else:
                data.append(u'Unknown type data')
            # TODO: the data when it is string ends with ".", HW a space is return
            #       after uses the UTF-8 conversion.
            return u'[{0}: Format data: {1}, Data: {2}]'.format(
                bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
                self._RawToUTF8(u''.join(data)))

        elif bsm_type in [u'BSM_TOKEN_ATTR32', u'BSM_TOKEN_ATTR64']:
            return (u'[{0}: Mode: {1}, UID: {2}, GID: {3}, '
                    u'File system ID: {4}, Node ID: {5}, Device: {6}]').format(
                        bsm_type, token.file_mode, token.uid, token.gid,
                        token.file_system_id, token.file_system_node_id,
                        token.device)

        elif bsm_type == u'BSM_TOKEN_GROUPS':
            arguments = []
            for _ in range(token):
                arguments.append(
                    self._RawToUTF8(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
            return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32_EX':
            if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain,
                                          u'') == u'INET6':
                saddr = self._IPv6Format(token.structure_addr_port.saddr_high,
                                         token.structure_addr_port.saddr_low)
                daddr = self._IPv6Format(token.structure_addr_port.daddr_high,
                                         token.structure_addr_port.daddr_low)
            else:
                saddr = self._IPv4Format(
                    token.structure_addr_port.source_address)
                daddr = self._IPv4Format(
                    token.structure_addr_port.destination_address)

            return u'[{0}: from {1} port {2} to {3} port {4}]'.format(
                bsm_type, saddr, token.structure_addr_port.source_port, daddr,
                token.structure_addr_port.destination_port)

        elif bsm_type == u'BSM_TOKEN_IPC_PERM':
            return (u'[{0}: user id {1}, group id {2}, create user id {3}, '
                    u'create group id {4}, access {5}]').format(
                        bsm_type, token.user_id, token.group_id,
                        token.creator_user_id, token.creator_group_id,
                        token.access_mode)

        elif bsm_type == u'BSM_TOKEN_SOCKET_UNIX':
            string = self._CopyUtf8ByteArrayToString(token.path)
            return u'[{0}: Family {1}, Path {2:s}]'.format(
                bsm_type, token.family, string)

        elif bsm_type == u'BSM_TOKEN_OPAQUE':
            string = self._CopyByteArrayToBase16String(token.text)
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type == u'BSM_TOKEN_SEQUENCE':
            return u'[{0}: {1}]'.format(bsm_type, token)

Example #24

Show file

File: mac_keychain.py Project: no-sec/plaso

class KeychainParser(interface.FileObjectParser):
  """Parser for Keychain files."""

  NAME = 'mac_keychain'
  DESCRIPTION = 'Parser for MacOS Keychain files.'

  KEYCHAIN_SIGNATURE = b'kych'
  KEYCHAIN_MAJOR_VERSION = 1
  KEYCHAIN_MINOR_VERSION = 0

  RECORD_TYPE_APPLICATION = 0x80000000
  RECORD_TYPE_INTERNET = 0x80000001

  # DB HEADER.
  KEYCHAIN_DB_HEADER = construct.Struct(
      'db_header',
      construct.Bytes('signature', 4),
      construct.UBInt16('major_version'),
      construct.UBInt16('minor_version'),
      construct.UBInt32('header_size'),
      construct.UBInt32('schema_offset'),
      construct.Padding(4))

  # DB SCHEMA.
  KEYCHAIN_DB_SCHEMA = construct.Struct(
      'db_schema',
      construct.UBInt32('size'),
      construct.UBInt32('number_of_tables'))

  # For each number_of_tables, the schema has a TABLE_OFFSET with the
  # offset starting in the DB_SCHEMA.
  TABLE_OFFSET = construct.UBInt32('table_offset')

  TABLE_HEADER = construct.Struct(
      'table_header',
      construct.UBInt32('table_size'),
      construct.UBInt32('record_type'),
      construct.UBInt32('number_of_records'),
      construct.UBInt32('first_record'),
      construct.UBInt32('index_offset'),
      construct.Padding(4),
      construct.UBInt32('recordnumbercount'))

  RECORD_HEADER = construct.Struct(
      'record_header',
      construct.UBInt32('entry_length'),
      construct.Padding(12),
      construct.UBInt32('ssgp_length'),
      construct.Padding(4),
      construct.UBInt32('creation_time'),
      construct.UBInt32('last_modification_time'),
      construct.UBInt32('text_description'),
      construct.Padding(4),
      construct.UBInt32('comments'),
      construct.Padding(8),
      construct.UBInt32('entry_name'),
      construct.Padding(20),
      construct.UBInt32('account_name'),
      construct.Padding(4))

  RECORD_HEADER_APP = construct.Struct(
      'record_entry_app',
      RECORD_HEADER,
      construct.Padding(4))

  RECORD_HEADER_INET = construct.Struct(
      'record_entry_inet',
      RECORD_HEADER,
      construct.UBInt32('where'),
      construct.UBInt32('protocol'),
      construct.UBInt32('type'),
      construct.Padding(4),
      construct.UBInt32('url'))

  TEXT = construct.PascalString(
      'text', length_field=construct.UBInt32('length'))

  TIME = construct.Struct(
      'timestamp',
      construct.String('year', 4),
      construct.String('month', 2),
      construct.String('day', 2),
      construct.String('hour', 2),
      construct.String('minute', 2),
      construct.String('second', 2),
      construct.Padding(2))

  TYPE_TEXT = construct.String('type', 4)

  # TODO: add more protocols.
  _PROTOCOL_TRANSLATION_DICT = {
      'htps': 'https',
      'smtp': 'smtp',
      'imap': 'imap',
      'http': 'http'}

  def _ReadEntryApplication(self, parser_mediator, file_object):
    """Extracts the information from an application password entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
    """
    record_offset = file_object.tell()
    try:
      record_struct = self.RECORD_HEADER_APP.parse_stream(file_object)
    except (IOError, construct.FieldError):
      parser_mediator.ProduceExtractionError(
          'unable to parse record structure at offset: 0x{0:08x}'.format(
              record_offset))
      return

    (ssgp_hash, creation_time, last_modification_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_mediator, file_object, record_struct.record_header,
         record_offset)

    # Move to the end of the record.
    next_record_offset = (
        record_offset + record_struct.record_header.entry_length)
    file_object.seek(next_record_offset, os.SEEK_SET)

    event_data = KeychainApplicationRecordEventData()
    event_data.account_name = account_name
    event_data.comments = comments
    event_data.entry_name = entry_name
    event_data.ssgp_hash = ssgp_hash
    event_data.text_description = text_description

    if creation_time:
      event = time_events.DateTimeValuesEvent(
          creation_time, definitions.TIME_DESCRIPTION_CREATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

    if last_modification_time:
      event = time_events.DateTimeValuesEvent(
          last_modification_time, definitions.TIME_DESCRIPTION_MODIFICATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

  def _ReadEntryHeader(
      self, parser_mediator, file_object, record, record_offset):
    """Read the common record attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_entry (dfvfs.FileEntry): a file entry object.
      file_object (dfvfs.FileIO): a file-like object.
      record (construct.Struct): record header structure.
      record_offset (int): offset of the start of the record.

    Returns:
      A tuple containing:
        ssgp_hash: Hash of the encrypted data (passwd, cert, note).
        creation_time (dfdatetime.TimeElements): entry creation time or None.
        last_modification_time ((dfdatetime.TimeElements): entry last
            modification time or None.
        text_description: A brief description of the entry.
        entry_name: Name of the entry
        account_name: Name of the account.
    """
    # TODO: reduce number of seeks and/or offset calculations needed
    # for parsing.

    # Info: The hash header always start with the string ssgp follow by
    #       the hash. Furthermore The fields are always a multiple of four.
    #       Then if it is not multiple the value is padded by 0x00.
    ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:])

    creation_time = None

    structure_offset = record_offset + record.creation_time - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      time_structure = self.TIME.parse_stream(file_object)
    except construct.FieldError as exception:
      time_structure = None
      parser_mediator.ProduceExtractionError(
          'unable to parse creation time with error: {0!s}'.format(exception))

    if time_structure:
      time_elements_tuple = (
          time_structure.year, time_structure.month, time_structure.day,
          time_structure.hour, time_structure.minute, time_structure.second)

      creation_time = dfdatetime_time_elements.TimeElements()
      try:
        creation_time.CopyFromStringTuple(
            time_elements_tuple=time_elements_tuple)
      except ValueError:
        creation_time = None
        parser_mediator.ProduceExtractionError(
            'invalid creation time value: {0!s}'.format(time_elements_tuple))

    last_modification_time = None

    structure_offset = record_offset + record.last_modification_time - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      time_structure = self.TIME.parse_stream(file_object)
    except construct.FieldError as exception:
      time_structure = None
      parser_mediator.ProduceExtractionError(
          'unable to parse last modification time with error: {0!s}'.format(
              exception))

    if time_structure:
      time_elements_tuple = (
          time_structure.year, time_structure.month, time_structure.day,
          time_structure.hour, time_structure.minute, time_structure.second)

      last_modification_time = dfdatetime_time_elements.TimeElements()
      try:
        last_modification_time.CopyFromStringTuple(
            time_elements_tuple=time_elements_tuple)
      except ValueError:
        last_modification_time = None
        parser_mediator.ProduceExtractionError(
            'invalid last modification time value: {0!s}'.format(
                time_elements_tuple))

    text_description = 'N/A'
    if record.text_description:
      structure_offset = record_offset + record.text_description - 1
      file_object.seek(structure_offset, os.SEEK_SET)

      try:
        text_description = self.TEXT.parse_stream(file_object)
      except construct.FieldError as exception:
        parser_mediator.ProduceExtractionError(
            'unable to parse text description with error: {0!s}'.format(
                exception))

    comments = 'N/A'
    if record.comments:
      structure_offset = record_offset + record.comments - 1
      file_object.seek(structure_offset, os.SEEK_SET)

      try:
        comments = self.TEXT.parse_stream(file_object)
      except construct.FieldError as exception:
        parser_mediator.ProduceExtractionError(
            'unable to parse comments with error: {0!s}'.format(exception))

    structure_offset = record_offset + record.entry_name - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      entry_name = self.TEXT.parse_stream(file_object)
    except construct.FieldError as exception:
      entry_name = 'N/A'
      parser_mediator.ProduceExtractionError(
          'unable to parse entry name with error: {0!s}'.format(exception))

    structure_offset = record_offset + record.account_name - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      account_name = self.TEXT.parse_stream(file_object)
    except construct.FieldError as exception:
      account_name = 'N/A'
      parser_mediator.ProduceExtractionError(
          'unable to parse account name with error: {0!s}'.format(exception))

    return (
        ssgp_hash, creation_time, last_modification_time,
        text_description, comments, entry_name, account_name)

  def _ReadEntryInternet(self, parser_mediator, file_object):
    """Extracts the information from an Internet password entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
    """
    record_offset = file_object.tell()
    try:
      record_header_struct = self.RECORD_HEADER_INET.parse_stream(file_object)
    except (IOError, construct.FieldError):
      parser_mediator.ProduceExtractionError((
          'unable to parse record header structure at offset: '
          '0x{0:08x}').format(record_offset))
      return

    (ssgp_hash, creation_time, last_modification_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_mediator, file_object, record_header_struct.record_header,
         record_offset)

    if not record_header_struct.where:
      where = 'N/A'
      protocol = 'N/A'
      type_protocol = 'N/A'

    else:
      offset = record_offset + record_header_struct.where - 1
      file_object.seek(offset, os.SEEK_SET)
      where = self.TEXT.parse_stream(file_object)

      offset = record_offset + record_header_struct.protocol - 1
      file_object.seek(offset, os.SEEK_SET)
      protocol = self.TYPE_TEXT.parse_stream(file_object)

      offset = record_offset + record_header_struct.type - 1
      file_object.seek(offset, os.SEEK_SET)
      type_protocol = self.TEXT.parse_stream(file_object)
      type_protocol = self._PROTOCOL_TRANSLATION_DICT.get(
          type_protocol, type_protocol)

      if record_header_struct.url:
        offset = record_offset + record_header_struct.url - 1
        file_object.seek(offset, os.SEEK_SET)
        url = self.TEXT.parse_stream(file_object)
        where = '{0:s}{1:s}'.format(where, url)

    # Move to the end of the record.
    next_record_offset = (
        record_offset + record_header_struct.record_header.entry_length)
    file_object.seek(next_record_offset, os.SEEK_SET)

    event_data = KeychainInternetRecordEventData()
    event_data.account_name = account_name
    event_data.comments = comments
    event_data.entry_name = entry_name
    event_data.protocol = protocol
    event_data.ssgp_hash = ssgp_hash
    event_data.text_description = text_description
    event_data.type_protocol = type_protocol
    event_data.where = where

    if creation_time:
      event = time_events.DateTimeValuesEvent(
          creation_time, definitions.TIME_DESCRIPTION_CREATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

    if last_modification_time:
      event = time_events.DateTimeValuesEvent(
          last_modification_time, definitions.TIME_DESCRIPTION_MODIFICATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

  def _ReadTableOffsets(self, parser_mediator, file_object):
    """Reads the table offsets.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      list[int]: table offsets.
    """
    # INFO: The HEADER KEYCHAIN:
    # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z]
    # Where the table offset is relative to the first byte of the DB Schema,
    # then we must add to this offset the size of the [DBHEADER].
    # Read the database schema and extract the offset for all the tables.
    # They are ordered by file position from the top to the bottom of the file.
    table_offsets = []

    try:
      db_schema_struct = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object)
    except (IOError, construct.FieldError):
      parser_mediator.ProduceExtractionError(
          'unable to parse database schema structure')
      return []

    for index in range(db_schema_struct.number_of_tables):
      try:
        table_offset = self.TABLE_OFFSET.parse_stream(file_object)
      except (IOError, construct.FieldError):
        parser_mediator.ProduceExtractionError(
            'unable to parse table offsets: {0:d}'.format(index))
        return

      table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof())

    return table_offsets

  @classmethod
  def GetFormatSpecification(cls):
    """Retrieves the format specification.

    Returns:
      FormatSpecification: format specification.
    """
    format_specification = specification.FormatSpecification(cls.NAME)
    format_specification.AddNewSignature(
        cls.KEYCHAIN_SIGNATURE, offset=0)
    return format_specification

  def ParseFileObject(self, parser_mediator, file_object, **kwargs):
    """Parses a MacOS keychain file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    try:
      db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object)
    except (IOError, construct.FieldError):
      raise errors.UnableToParseFile('Unable to parse file header.')

    if db_header.signature != self.KEYCHAIN_SIGNATURE:
      raise errors.UnableToParseFile('Not a MacOS keychain file.')

    if (db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or
        db_header.minor_version != self.KEYCHAIN_MINOR_VERSION):
      parser_mediator.ProduceExtractionError(
          'unsupported format version: {0:s}.{1:s}'.format(
              db_header.major_version, db_header.minor_version))
      return

    # TODO: document format and determine if -1 offset correction is needed.
    table_offsets = self._ReadTableOffsets(parser_mediator, file_object)
    for table_offset in table_offsets:
      # Skipping X bytes, unknown data at this point.
      file_object.seek(table_offset, os.SEEK_SET)

      try:
        table = self.TABLE_HEADER.parse_stream(file_object)
      except (IOError, construct.FieldError):
        parser_mediator.ProduceExtractionError(
            'unable to parse table structure at offset: 0x{0:08x}'.format(
                table_offset))
        continue

      # Table_offset: absolute byte in the file where the table starts.
      # table.first_record: first record in the table, relative to the
      #                     first byte of the table.
      file_object.seek(table_offset + table.first_record, os.SEEK_SET)

      if table.record_type == self.RECORD_TYPE_INTERNET:
        for _ in range(table.number_of_records):
          self._ReadEntryInternet(parser_mediator, file_object)

      elif table.record_type == self.RECORD_TYPE_APPLICATION:
        for _ in range(table.number_of_records):
          self._ReadEntryApplication(parser_mediator, file_object)

Example #25

Show file

class CupsIppParser(interface.FileObjectParser):
    """Parser for CUPS IPP files. """

    NAME = 'cups_ipp'
    DESCRIPTION = 'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct',
                                       construct.UBInt8('major_version'),
                                       construct.UBInt8('minor_version'),
                                       construct.UBInt16('operation_id'),
                                       construct.UBInt32('request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID, per cups source file ipp-support.c.
    TYPE_GENERAL_INTEGER = 0x20
    TYPE_INTEGER = 0x21
    TYPE_BOOL = 0x22
    TYPE_ENUMERATION = 0x23
    TYPE_DATETIME = 0x31

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8('integer')
    INTEGER_32 = construct.UBInt32('integer')
    TEXT = construct.PascalString('text',
                                  length_field=construct.UBInt8('length'))
    BOOLEAN = construct.Struct('boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct('integer_value', construct.Padding(1),
                               INTEGER_32)

    # This is an RFC2579 datetime.
    DATETIME = construct.Struct(
        'datetime',
        construct.Padding(1),
        construct.UBInt16('year'),
        construct.UBInt8('month'),
        construct.UBInt8('day'),
        construct.UBInt8('hour'),
        construct.UBInt8('minutes'),
        construct.UBInt8('seconds'),
        construct.UBInt8('deciseconds'),
        construct.String('direction_from_utc', length=1, encoding='ascii'),
        construct.UBInt8('hours_from_utc'),
        construct.UBInt8('minutes_from_utc'),
    )

    # Name of the pair.
    PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    _NAME_PAIR_TRANSLATION = {
        'com.apple.print.JobInfo.PMApplicationName': 'application',
        'com.apple.print.JobInfo.PMJobOwner': 'owner',
        'DestinationPrinterID': 'printer_id',
        'document-format': 'doc_type',
        'job-name': 'job_name',
        'job-originating-host-name': 'computer_name',
        'job-originating-user-name': 'user',
        'job-uuid': 'job_id',
        'printer-uri': 'uri'
    }

    _DATE_TIME_VALUES = {
        'date-time-at-creation': definitions.TIME_DESCRIPTION_CREATION,
        'date-time-at-processing': definitions.TIME_DESCRIPTION_START,
        'date-time-at-completed': definitions.TIME_DESCRIPTION_END
    }

    _POSIX_TIME_VALUES = {
        'time-at-creation': definitions.TIME_DESCRIPTION_CREATION,
        'time-at-processing': definitions.TIME_DESCRIPTION_START,
        'time-at-completed': definitions.TIME_DESCRIPTION_END
    }

    _DATE_TIME_VALUE_NAMES = list(_DATE_TIME_VALUES.keys())
    _DATE_TIME_VALUE_NAMES.extend(list(_POSIX_TIME_VALUES.keys()))

    def _GetStringValue(self, data_dict, name, default_value=None):
        """Retrieves a specific string value from the data dict.

    Args:
      data_dict (dict[str, list[str]): values per name.
      name (str): name of the value to retrieve.

    Returns:
      str: value represented as a string.
    """
        values = data_dict.get(name, None)
        if not values:
            return default_value

        for index, value in enumerate(values):
            if ',' in value:
                values[index] = '"{0:s}"'.format(value)

        return ', '.join(values)

    def _ReadPair(self, parser_mediator, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      tuple: contains:

        str: name or None.
        str: value or None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse pair identifier with error: {0!s}'.format(
                    exception))
            return None, None

        # Name = Length name + name + 0x00
        try:
            pair_name = self.PAIR_NAME.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse pair name with error: {0!s}'.format(
                    exception))
            return None, None

        try:
            name = pair_name.text.decode('utf-8')
        except UnicodeDecodeError as exception:
            parser_mediator.ProduceExtractionError(
                'unable to decode pair name with error: {0!s}'.format(
                    exception))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                       self.TYPE_ENUMERATION):
            value_structure = self.INTEGER
        elif type_id == self.TYPE_BOOL:
            value_structure = self.BOOLEAN
        elif type_id == self.TYPE_DATETIME:
            value_structure = self.DATETIME
        else:
            value_structure = self.TEXT

        try:
            value = value_structure.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse value with error: {0!s}'.format(exception))
            return None, None

        if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                       self.TYPE_ENUMERATION):
            value = value.integer

        elif type_id == self.TYPE_BOOL:
            value = bool(value.integer)

        elif type_id == self.TYPE_DATETIME:
            rfc2579_date_time_tuple = (value.year, value.month, value.day,
                                       value.hour, value.minutes,
                                       value.seconds, value.deciseconds,
                                       value.direction_from_utc,
                                       value.hours_from_utc,
                                       value.minutes_from_utc)
            value = dfdatetime_rfc2579_date_time.RFC2579DateTime(
                rfc2579_date_time_tuple=rfc2579_date_time_tuple)

        else:
            try:
                value = value.decode('utf-8')
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceExtractionError(
                    'unable to decode value with error: {0!s}'.format(
                        exception))
                return None, None

        return name, value

    def _ReadPairs(self, parser_mediator, file_object):
        """Reads the attribute name and value pairs from a CUPS IPP event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      dict[str, list[str]]: values per name.
    """
        data_dict = {}

        name, value = self._ReadPair(parser_mediator, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self._NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self._ReadPair(parser_mediator, file_object)

        return data_dict

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a CUPS IPP file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                'Unable to parse CUPS IPP Header with error: {0!s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            raise errors.UnableToParseFile(
                '[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logger.debug(
                '[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_mediator.GetDisplayName()))

        data_dict = self._ReadPairs(parser_mediator, file_object)

        time_dict = {}

        for name in self._DATE_TIME_VALUE_NAMES:
            value = data_dict.get(name, None)
            if value is not None:
                time_dict[name] = value
                del data_dict[name]

        event_data = CupsIppEventData()
        event_data.application = self._GetStringValue(data_dict, 'application')
        event_data.computer_name = self._GetStringValue(
            data_dict, 'computer_name')
        event_data.copies = data_dict.get('copies', [0])[0]
        event_data.data_dict = data_dict
        event_data.doc_type = self._GetStringValue(data_dict, 'doc_type')
        event_data.job_id = self._GetStringValue(data_dict, 'job_id')
        event_data.job_name = self._GetStringValue(data_dict, 'job_name')
        event_data.user = self._GetStringValue(data_dict, 'user')
        event_data.owner = self._GetStringValue(data_dict, 'owner')
        event_data.printer_id = self._GetStringValue(data_dict, 'printer_id')
        event_data.uri = self._GetStringValue(data_dict, 'uri')

        for name, usage in iter(self._DATE_TIME_VALUES.items()):
            time_values = time_dict.get(name, [])
            for date_time in time_values:
                event = time_events.DateTimeValuesEvent(date_time, usage)
                parser_mediator.ProduceEventWithEventData(event, event_data)

        for name, usage in iter(self._POSIX_TIME_VALUES.items()):
            time_values = time_dict.get(name, [])
            for time_value in time_values:
                date_time = dfdatetime_posix_time.PosixTime(
                    timestamp=time_value)
                event = time_events.DateTimeValuesEvent(date_time, usage)
                parser_mediator.ProduceEventWithEventData(event, event_data)

Example #26

Show file

File: firefox_cache.py Project: iwm911/plaso

class FirefoxCacheParser(parser.BaseParser):
    """Extract cached records from Firefox."""

    NAME = 'firefox_cache'

    # Number of bytes allocated to a cache record metadata.
    RECORD_HEADER_SIZE = 36

    # Initial size of Firefox >= 4 cache files.
    INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4

    # Smallest possible block size in Firefox cache files.
    MIN_BLOCK_SIZE = 256

    RECORD_HEADER_STRUCT = construct.Struct('record_header',
                                            construct.UBInt16('major'),
                                            construct.UBInt16('minor'),
                                            construct.UBInt32('location'),
                                            construct.UBInt32('fetch_count'),
                                            construct.UBInt32('last_fetched'),
                                            construct.UBInt32('last_modified'),
                                            construct.UBInt32('expire_time'),
                                            construct.UBInt32('data_size'),
                                            construct.UBInt32('request_size'),
                                            construct.UBInt32('info_size'))

    ALTERNATIVE_CACHE_NAME = (pyparsing.Word(pyparsing.hexnums, exact=5) +
                              pyparsing.Word("m", exact=1) +
                              pyparsing.Word(pyparsing.nums, exact=2))

    FIREFOX_CACHE_CONFIG = collections.namedtuple(
        u'firefox_cache_config', u'block_size first_record_offset')

    REQUEST_METHODS = [
        u'GET', 'HEAD', 'POST', 'PUT', 'DELETE', u'TRACE', 'OPTIONS',
        'CONNECT', 'PATCH'
    ]

    def __GetFirefoxConfig(self, file_entry):
        """Determine cache file block size. Raises exception if not found."""

        if file_entry.name[0:9] != '_CACHE_00':
            try:
                # Match alternative filename. Five hex characters + 'm' + two digit
                # number, e.g. "01ABCm02". 'm' is for metadata. Cache files with 'd'
                # instead contain data only.
                self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name)
            except pyparsing.ParseException:
                raise errors.UnableToParseFile(u'Not a Firefox cache file.')

        file_object = file_entry.GetFileObject()

        # There ought to be a valid record within the first 4MB. We use this
        # limit to prevent reading large invalid files.
        to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE)

        while file_object.get_offset() < to_read:
            offset = file_object.get_offset()

            try:
                # We have not yet determined the block size, so we use the smallest
                # possible size.
                record = self.__NextRecord(file_entry.name, file_object,
                                           self.MIN_BLOCK_SIZE)

                record_size = (self.RECORD_HEADER_SIZE + record.request_size +
                               record.info_size)

                if record_size >= 4096:
                    # _CACHE_003_
                    block_size = 4096
                elif record_size >= 1024:
                    # _CACHE_002_
                    block_size = 1024
                else:
                    # _CACHE_001_
                    block_size = 256

                return self.FIREFOX_CACHE_CONFIG(block_size, offset)

            except IOError:
                logging.debug(u'{0:s}:{1:d}: Invalid record.'.format(
                    file_entry.name, offset))

        raise errors.UnableToParseFile(u'Could not find a valid cache record. '
                                       u'Not a Firefox cache file.')

    def __Accept(self, candidate, block_size):
        """Determine whether the candidate is a valid cache record."""

        record_size = (self.RECORD_HEADER_SIZE + candidate.request_size +
                       candidate.info_size)

        return (candidate.request_size > 0 and candidate.fetch_count > 0
                and candidate.major == 1 and record_size // block_size < 256)

    def __NextRecord(self, filename, file_object, block_size):
        """Provide the next cache record."""

        offset = file_object.get_offset()

        try:
            candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError):
            raise IOError(u'Unable to parse stream.')

        if not self.__Accept(candidate, block_size):
            # Move reader to next candidate block.
            file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR)
            raise IOError(u'Not a valid Firefox cache record.')

        # The last byte in a request is null.
        url = file_object.read(candidate.request_size)[:-1]

        # HTTP response header, even elements are keys, odd elements values.
        headers = file_object.read(candidate.info_size)

        request_method, _, _ = (
            headers.partition('request-method\x00')[2].partition('\x00'))

        _, _, response_head = headers.partition('response-head\x00')

        response_code, _, _ = response_head.partition("\r\n")

        if request_method not in self.REQUEST_METHODS:
            logging.debug(
                u'{0:s}:{1:d}: Unknown HTTP method "{2:s}". Response "{3:s}"'.
                format(filename, offset, request_method, headers))

        if response_code[0:4] != 'HTTP':
            logging.debug(
                u'{0:s}:{1:d}: Could not determine HTTP response code. '
                u'Response headers: "{2:s}".'.format(filename, offset,
                                                     headers))

        # A request can span multiple blocks, so we use modulo.
        _, remainder = divmod(file_object.get_offset() - offset, block_size)

        # Move reader to next candidate block. Include the null-byte skipped above.
        file_object.seek(block_size - remainder, os.SEEK_CUR)

        return FirefoxCacheEvent(candidate, request_method, url, response_code)

    def Parse(self, file_entry):
        """Extract records from a Firefox cache file."""

        firefox_config = self.__GetFirefoxConfig(file_entry)

        file_object = file_entry.GetFileObject()

        file_object.seek(firefox_config.first_record_offset)

        while file_object.get_offset() < file_object.get_size():
            try:
                yield self.__NextRecord(file_entry.name, file_object,
                                        firefox_config.block_size)
            except IOError:
                logging.debug(u'{0:s}:{1:d}: Invalid cache record.'.format(
                    file_entry.name,
                    file_object.get_offset() - self.MIN_BLOCK_SIZE))

Example #27

Show file

class ASLParser(interface.FileObjectParser):
    """Parser for ASL log files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    _ASL_MAGIC = b'ASL DB\x00\x00\x00\x00\x00\x00'

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: time when the first entry was written.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # last_offset: last record in the file.
    _ASL_HEADER_STRUCT = construct.Struct(u'asl_header_struct',
                                          construct.String(u'magic', 12),
                                          construct.UBInt32(u'version'),
                                          construct.UBInt64(u'offset'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'cache_size'),
                                          construct.UBInt64(u'last_offset'),
                                          construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: the entry creation date and time.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    _ASL_RECORD_STRUCT = construct.Struct(u'asl_record_struct',
                                          construct.Padding(2),
                                          construct.UBInt32(u'tam_entry'),
                                          construct.UBInt64(u'next_offset'),
                                          construct.UBInt64(u'asl_message_id'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'nanosec'),
                                          construct.UBInt16(u'level'),
                                          construct.UBInt16(u'flags'),
                                          construct.UBInt32(u'pid'),
                                          construct.UBInt32(u'uid'),
                                          construct.UBInt32(u'gid'),
                                          construct.UBInt32(u'read_uid'),
                                          construct.UBInt32(u'read_gid'),
                                          construct.UBInt64(u'ref_pid'))

    _ASL_RECORD_STRUCT_SIZE = _ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    _ASL_OCTET_STRING = construct.ExprAdapter(
        construct.Octet(u'string'),
        encoder=lambda obj, ctx: ord(obj),
        decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    _ASL_STRING = construct.BitStruct(
        u'string', construct.Flag(u'type'), construct.Bits(u'filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble(u'string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, _ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    _ASL_POINTER = construct.UBInt64(u'pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes size of String][String].
    _ASL_RECORD_DYN_VALUE = construct.Struct(
        u'asl_record_dyn_value', construct.Padding(2),
        construct.UBInt32(u'size'),
        construct.Bytes(u'value', lambda ctx: ctx.size))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses an ALS file-like object.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self._ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self._ASL_MAGIC:
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        offset = header.offset
        if not offset:
            return

        header_last_offset = header.last_offset

        previous_offset = offset
        event_object, offset = self.ReadASLEvent(parser_mediator, file_object,
                                                 offset)
        while event_object:
            # Sanity check, the last read element must be the same as
            # indicated by the header.
            if offset == 0 and previous_offset != header_last_offset:
                parser_mediator.ProduceParseError(
                    u'Unable to parse header. Last element header does not match '
                    u'header offset.')
            previous_offset = offset
            event_object, offset = self.ReadASLEvent(parser_mediator,
                                                     file_object, offset)

    def ReadASLEvent(self, parser_mediator, file_object, offset):
        """Reads an ASL record at a specific offset.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to an ASL file.
      offset: an integer containing the offset of the ASL record.

    Returns:
      A tuple of an event object extracted from the ASL record,
      and the offset to the next ASL record in the file.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_data_offset = file_object.tell()

        try:
            dynamic_data = file_object.read(offset - dynamic_data_offset)
        except IOError as exception:
            parser_mediator.ProduceParseError(
                u'unable to read ASL record dynamic data with error: {0:s}'.
                format(exception))
            return None, None

        if not offset:
            return None, None

        try:
            record_struct = self._ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceParseError(
                u'unable to parse ASL record with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_struct.tam_entry - self._ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2 x [8 bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the extra field.
        # All of this 8-byte field can be saved using one of these three different
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                field_data = file_object.read(8)
            except IOError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to read ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            # Try to read the field data as a string.
            try:
                asl_string_struct = self._ASL_STRING.parse(field_data)
                string_data = b''.join(
                    asl_string_struct.string[0:asl_string_struct.
                                             string_length])
                values.append(string_data)

                # Go to parse the next extra field.
                tam_fields -= 8
                continue

            except ValueError:
                pass

            # If the field is not a string it must be a pointer.
            try:
                pointer_value = self._ASL_POINTER.parse(field_data)
            except ValueError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to parse ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            if not pointer_value:
                # Next extra field: 8 bytes more.
                tam_fields -= 8
                continue

            # The next IF ELSE is only for performance issues, avoiding seek.
            # If the pointer points a lower position than where the actual entry
            # starts, it means that it points to a previous entry.
            pos = pointer_value - dynamic_data_offset

            # Greater or equal 0 means that the data is in the actual entry.
            if pos >= 0:
                try:
                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse(
                        dynamic_data[pos:])
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                except (IOError, construct.FieldError) as exception:
                    parser_mediator.ProduceParseError((
                        u'unable to parse ASL record dynamic value with error: '
                        u'{0:s}').format(exception))
                    return None, None

            else:
                # Only if it is a pointer that points to the
                # heap from another entry we use the seek method.
                main_position = file_object.tell()

                # If the pointer is in a previous entry.
                if main_position > pointer_value:
                    file_object.seek(pointer_value - main_position,
                                     os.SEEK_CUR)

                    try:
                        dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)
                        dyn_value = dyn_value_struct.value.partition(
                            b'\x00')[0]
                        values.append(dyn_value)

                    except (IOError, construct.FieldError):
                        parser_mediator.ProduceParseError((
                            u'the pointer at {0:d} (0x{0:08x}) points to invalid '
                            u'information.'
                        ).format(main_position - self._ASL_POINTER.sizeof()))

                    # Come back to the position in the entry.
                    _ = file_object.read(main_position - file_object.tell())

                else:
                    _ = file_object.read(pointer_value - main_position)

                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                        file_object)
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                    # Come back to the position in the entry.
                    file_object.seek(main_position - file_object.tell(),
                                     os.SEEK_CUR)

            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        micro_seconds, _ = divmod(record_struct.nanosec, 1000)

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        number_of_values = len(values)
        if number_of_values < 4:
            parser_mediator.ProduceParseError(
                u'less than four values read from an ASL event.')

        computer_name = u'N/A'
        sender = u'N/A'
        facility = u'N/A'
        message = u'N/A'

        if number_of_values >= 1:
            computer_name = values[0].decode(u'utf-8')

        if number_of_values >= 2:
            sender = values[1].decode(u'utf-8')

        if number_of_values >= 3:
            facility = values[2].decode(u'utf-8')

        if number_of_values >= 4:
            message = values[3].decode(u'utf-8')

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = u''
        if number_of_values > 4 and number_of_values % 2 == 0:
            # Taking all the extra attributes and merging them together,
            # eg: a = [1, 2, 3, 4] will look like "1: 2, 3: 4".
            try:
                extra_values = map(py2to3.UNICODE_TYPE, values[4:])
                extra_information = u', '.join(
                    map(u': '.join, zip(extra_values[0::2],
                                        extra_values[1::2])))
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceParseError(
                    u'Unable to decode all ASL values in the extra information fields.'
                )

        event_object = ASLEvent(record_struct.timestamp,
                                offset,
                                record_struct.asl_message_id,
                                record_struct.level,
                                record_struct.pid,
                                record_struct.uid,
                                record_struct.gid,
                                record_struct.read_uid,
                                record_struct.read_gid,
                                computer_name,
                                sender,
                                facility,
                                message,
                                extra_information,
                                micro_seconds=micro_seconds)
        parser_mediator.ProduceEvent(event_object)

        return (event_object, record_struct.next_offset)

Example #28

Show file

File: java_idx.py Project: cvandeplas/plaso

class JavaIDXParser(interface.BaseParser):
    """Parse Java IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    NAME = 'java_idx'
    DESCRIPTION = u'Parser for Java IDX files.'

    IDX_SHORT_STRUCT = construct.Struct('magic', construct.UBInt8('busy'),
                                        construct.UBInt8('incomplete'),
                                        construct.UBInt32('idx_version'))

    IDX_602_STRUCT = construct.Struct(
        'IDX_602_Full', construct.UBInt16('null_space'),
        construct.UBInt8('shortcut'), construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.PascalString('version_string',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespace',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        'IDX_605_Section1', construct.UBInt8('shortcut'),
        construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.UBInt64('validation_date'), construct.UBInt8('signed'),
        construct.UBInt32('sec2len'), construct.UBInt32('sec3len'),
        construct.UBInt32('sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        'IDX_605_Section2',
        construct.PascalString('version',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespec',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('ip_address',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        'Java.ReadUTF',
        construct.PascalString('string',
                               length_field=construct.UBInt16('length')))

    def Parse(self, parser_context, file_entry):
        """Extract data from a Java cache IDX file.

    This is the main parsing engine for the parser. It determines if
    the selected file is a proper IDX file. It then checks the file
    version to determine the correct structure to apply to extract
    data.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = 'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = 'Unknown'
                ip_address = 'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == 'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                download_date = timelib.Timestamp.FromTimeString(
                    value.string, gmt_as_timezone=False)

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        last_modified_timestamp = timelib.Timestamp.FromJavaTime(
            last_modified_date)
        # TODO: Move the timestamp description fields into eventdata.
        event_object = JavaIDXEvent(last_modified_timestamp,
                                    'File Hosted Date', magic.idx_version, url,
                                    ip_address)
        parser_context.ProduceEvent(event_object,
                                    parser_name=self.NAME,
                                    file_entry=file_entry)

        if section_one:
            expiration_date = section_one.get('expiration_date', None)
            if expiration_date:
                expiration_timestamp = timelib.Timestamp.FromJavaTime(
                    expiration_date)
                event_object = JavaIDXEvent(expiration_timestamp,
                                            'File Expiration Date',
                                            magic.idx_version, url, ip_address)
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

        if download_date:
            event_object = JavaIDXEvent(
                download_date, eventdata.EventTimestamp.FILE_DOWNLOADED,
                magic.idx_version, url, ip_address)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

Example #29

Show file

File: alias.py Project: trx1138/mac-osx-forensics

#!/usr/bin/python

import binascii
import construct
import datetime
import sys

from binplist import binplist

HFS_to_Epoch = 2082844800
s_alias = construct.Struct('plist_alias', construct.Padding(4),
                           construct.UBInt16('length'), construct.Padding(6),
                           construct.UBInt32('timestamp1'),
                           construct.Padding(18),
                           construct.UBInt32('timestamp2'),
                           construct.Padding(20))

s_type = construct.UBInt16('type')

s_volume = construct.Struct(
    'volume', construct.UBInt16('volume1_length'),
    construct.UBInt16('characters1'),
    construct.String('volume1', lambda ctx: ctx.characters1 * 2),
    construct.Padding(2), construct.UBInt16('volume2_length'),
    construct.UBInt16('characters2'),
    construct.String('volume2', lambda ctx: ctx.characters2 * 2))

s_mount_point = construct.PascalString(
    'mount_point', length_field=construct.UBInt16('length'))