Python UBInt64 Examples

Programming Language: Python

Namespace/Package Name: construct

Method/Function: UBInt64

Examples at hotexamples.com: 11

Python UBInt64 - 11 examples found. These are the top rated real world Python examples of construct.UBInt64 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

class BSMParser(interface.FileObjectParser):
  """Parser for BSM files."""

  NAME = 'bsm_log'
  DESCRIPTION = 'Parser for BSM log files.'

  # BSM supported version (0x0b = 11).
  AUDIT_HEADER_VERSION = 11

  # Magic Trail Header.
  BSM_TOKEN_TRAILER_MAGIC = b'b105'

  # IP Version constants.
  AU_IPv4 = 4
  AU_IPv6 = 16

  IPV4_STRUCT = construct.UBInt32('ipv4')

  IPV6_STRUCT = construct.Struct(
      'ipv6',
      construct.UBInt64('high'),
      construct.UBInt64('low'))

  # Tested structures.
  # INFO: I have ommited the ID in the structures declaration.
  #       I used the BSM_TYPE first to read the ID, and then, the structure.
  # Tokens always start with an ID value that identifies their token
  # type and subsequent structure.
  _BSM_TOKEN = construct.UBInt8('token_id')

  # Data type structures.
  BSM_TOKEN_DATA_CHAR = construct.String('value', 1)
  BSM_TOKEN_DATA_SHORT = construct.UBInt16('value')
  BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value')

  # Common structure used by other structures.
  # audit_uid: integer, uid that generates the entry.
  # effective_uid: integer, the permission user used.
  # effective_gid: integer, the permission group used.
  # real_uid: integer, user id of the user that execute the process.
  # real_gid: integer, group id of the group that execute the process.
  # pid: integer, identification number of the process.
  # session_id: unknown, need research.
  BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
      'subject_data',
      construct.UBInt32('audit_uid'),
      construct.UBInt32('effective_uid'),
      construct.UBInt32('effective_gid'),
      construct.UBInt32('real_uid'),
      construct.UBInt32('real_gid'),
      construct.UBInt32('pid'),
      construct.UBInt32('session_id'))

  # Common structure used by other structures.
  # Identify the kind of inet (IPv4 or IPv6)
  # TODO: instead of 16, AU_IPv6 must be used.
  BSM_IP_TYPE_SHORT = construct.Struct(
      'bsm_ip_type_short',
      construct.UBInt32('net_type'),
      construct.Switch(
          'ip_addr',
          _BSMTokenGetNetType,
          {16: IPV6_STRUCT},
          default=IPV4_STRUCT))

  # Initial fields structure used by header structures.
  # length: integer, the length of the entry, equal to trailer (doc: length).
  # version: integer, version of BSM (AUDIT_HEADER_VERSION).
  # event_type: integer, the type of event (/etc/security/audit_event).
  # modifier: integer, unknown, need research (It is always 0).
  BSM_HEADER = construct.Struct(
      'bsm_header',
      construct.UBInt32('length'),
      construct.UBInt8('version'),
      construct.UBInt16('event_type'),
      construct.UBInt16('modifier'))

  # First token of one entry.
  # timestamp: unsigned integer, number of seconds since
  #            January 1, 1970 00:00:00 UTC.
  # microseconds: unsigned integer, number of micro seconds.
  BSM_HEADER32 = construct.Struct(
      'bsm_header32',
      BSM_HEADER,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  BSM_HEADER64 = construct.Struct(
      'bsm_header64',
      BSM_HEADER,
      construct.UBInt64('timestamp'),
      construct.UBInt64('microseconds'))

  BSM_HEADER32_EX = construct.Struct(
      'bsm_header32_ex',
      BSM_HEADER,
      BSM_IP_TYPE_SHORT,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  # Token TEXT, provides extra information.
  BSM_TOKEN_TEXT = construct.Struct(
      'bsm_token_text',
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Path of the executable.
  BSM_TOKEN_PATH = BSM_TOKEN_TEXT

  # Identified the end of the record (follow by TRAILER).
  # status: integer that identifies the status of the exit (BSM_ERRORS).
  # return: returned value from the operation.
  BSM_TOKEN_RETURN32 = construct.Struct(
      'bsm_token_return32',
      construct.UBInt8('status'),
      construct.UBInt32('return_value'))

  BSM_TOKEN_RETURN64 = construct.Struct(
      'bsm_token_return64',
      construct.UBInt8('status'),
      construct.UBInt64('return_value'))

  # Identified the number of bytes that was written.
  # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
  # length: integer that has the number of bytes from the entry size.
  BSM_TOKEN_TRAILER = construct.Struct(
      'bsm_token_trailer',
      construct.UBInt16('magic'),
      construct.UBInt32('record_length'))

  # A 32-bits argument.
  # num_arg: the number of the argument.
  # name_arg: the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT32 = construct.Struct(
      'bsm_token_argument32',
      construct.UBInt8('num_arg'),
      construct.UBInt32('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # A 64-bits argument.
  # num_arg: integer, the number of the argument.
  # name_arg: text, the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT64 = construct.Struct(
      'bsm_token_argument64',
      construct.UBInt8('num_arg'),
      construct.UBInt64('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Identify an user.
  # terminal_id: unknown, research needed.
  # terminal_addr: unknown, research needed.
  BSM_TOKEN_SUBJECT32 = construct.Struct(
      'bsm_token_subject32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # Identify an user using a extended Token.
  # terminal_port: unknown, need research.
  # net_type: unknown, need research.
  BSM_TOKEN_SUBJECT32_EX = construct.Struct(
      'bsm_token_subject32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_opaque // AUT_OPAQUE
  BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

  # au_to_seq // AUT_SEQ
  BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

  # Program execution with options.
  # For each argument we are going to have a string+ "\x00".
  # Example: [00 00 00 02][41 42 43 00 42 42 00]
  #          2 Arguments, Arg1: [414243] Arg2: [4242].
  BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments')

  BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
      'bsm_token_exec_argument',
      construct.RepeatUntil(
          _BSMTokenIsEndOfString, construct.StaticField("text", 1)))

  # au_to_in_addr // AUT_IN_ADDR:
  BSM_TOKEN_ADDR = IPV4_STRUCT

  # au_to_in_addr_ext // AUT_IN_ADDR_EX:
  BSM_TOKEN_ADDR_EXT = construct.Struct(
      'bsm_token_addr_ext',
      construct.UBInt32('net_type'),
      IPV6_STRUCT)

  # au_to_ip // AUT_IP:
  # TODO: parse this header in the correct way.
  BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20)

  # au_to_ipc // AUT_IPC:
  BSM_TOKEN_IPC = construct.Struct(
      'bsm_token_ipc',
      construct.UBInt8('object_type'),
      construct.UBInt32('object_id'))

  # au_to_ipc_perm // au_to_ipc_perm
  BSM_TOKEN_IPC_PERM = construct.Struct(
      'bsm_token_ipc_perm',
      construct.UBInt32('user_id'),
      construct.UBInt32('group_id'),
      construct.UBInt32('creator_user_id'),
      construct.UBInt32('creator_group_id'),
      construct.UBInt32('access_mode'),
      construct.UBInt32('slot_seq'),
      construct.UBInt32('key'))

  # au_to_iport // AUT_IPORT:
  BSM_TOKEN_PORT = construct.UBInt16('port_number')

  # au_to_file // AUT_OTHER_FILE32:
  BSM_TOKEN_FILE = construct.Struct(
      'bsm_token_file',
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # au_to_subject64 // AUT_SUBJECT64:
  BSM_TOKEN_SUBJECT64 = construct.Struct(
      'bsm_token_subject64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_subject64_ex // AU_IPv4:
  BSM_TOKEN_SUBJECT64_EX = construct.Struct(
      'bsm_token_subject64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      construct.UBInt32('terminal_type'),
      BSM_IP_TYPE_SHORT)

  # au_to_process32 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS32 = construct.Struct(
      'bsm_token_process32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # au_to_process64 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS64 = construct.Struct(
      'bsm_token_process64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_process32_ex // AUT_PROCESS32_EX:
  BSM_TOKEN_PROCESS32_EX = construct.Struct(
      'bsm_token_process32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_process64_ex // AUT_PROCESS64_EX:
  BSM_TOKEN_PROCESS64_EX = construct.Struct(
      'bsm_token_process64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_sock_inet32 // AUT_SOCKINET32:
  BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
      'bsm_token_aut_sockinet32',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV4_STRUCT)

  # Info: checked against the source code of XNU, but not against
  #       real BSM file.
  BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
      'bsm_token_aut_sockinet128',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV6_STRUCT)

  INET6_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt64('saddr_high'),
      construct.UBInt64('saddr_low'),
      construct.UBInt16('destination_port'),
      construct.UBInt64('daddr_high'),
      construct.UBInt64('daddr_low'))

  INET4_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt32('source_address'),
      construct.UBInt16('destination_port'),
      construct.UBInt32('destination_address'))

  # au_to_socket_ex // AUT_SOCKET_EX
  # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
  BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
      'bsm_token_aut_sockinet32_ex',
      construct.UBInt16('socket_domain'),
      construct.UBInt16('socket_type'),
      construct.Switch(
          'structure_addr_port',
          _BSMTokenGetSocketDomain,
          {26: INET6_ADDR_TYPE},
          default=INET4_ADDR_TYPE))

  # au_to_sock_unix // AUT_SOCKUNIX
  BSM_TOKEN_SOCKET_UNIX = construct.Struct(
      'bsm_token_au_to_sock_unix',
      construct.UBInt16('family'),
      construct.RepeatUntil(
          _BSMTokenIsEndOfString,
          construct.StaticField("path", 1)))

  # au_to_data // au_to_data
  # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
  # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
  # unit_count: number of type values.
  # BSM_TOKEN_DATA has a end field = type * unit_count
  BSM_TOKEN_DATA = construct.Struct(
      'bsm_token_data',
      construct.UBInt8('how_to_print'),
      construct.UBInt8('data_type'),
      construct.UBInt8('unit_count'))

  # au_to_attr32 // AUT_ATTR32
  BSM_TOKEN_ATTR32 = construct.Struct(
      'bsm_token_attr32',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt32('device'))

  # au_to_attr64 // AUT_ATTR64
  BSM_TOKEN_ATTR64 = construct.Struct(
      'bsm_token_attr64',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt64('device'))

  # au_to_exit // AUT_EXIT
  BSM_TOKEN_EXIT = construct.Struct(
      'bsm_token_exit',
      construct.UBInt32('status'),
      construct.UBInt32('return_value'))

  # au_to_newgroups // AUT_NEWGROUPS
  # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
  BSM_TOKEN_GROUPS = construct.UBInt16('group_number')

  # au_to_exec_env == au_to_exec_args
  BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

  # au_to_zonename //AUT_ZONENAME
  BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

  # Token ID.
  # List of valid Token_ID.
  # Token_ID -> (NAME_STRUCTURE, STRUCTURE)
  # Only the checked structures are been added to the valid structures lists.
  _BSM_TOKEN_TYPES = {
      17: ('BSM_TOKEN_FILE', BSM_TOKEN_FILE),
      19: ('BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER),
      20: ('BSM_HEADER32', BSM_HEADER32),
      21: ('BSM_HEADER64', BSM_HEADER64),
      33: ('BSM_TOKEN_DATA', BSM_TOKEN_DATA),
      34: ('BSM_TOKEN_IPC', BSM_TOKEN_IPC),
      35: ('BSM_TOKEN_PATH', BSM_TOKEN_PATH),
      36: ('BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32),
      38: ('BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32),
      39: ('BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32),
      40: ('BSM_TOKEN_TEXT', BSM_TOKEN_TEXT),
      41: ('BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE),
      42: ('BSM_TOKEN_ADDR', BSM_TOKEN_ADDR),
      43: ('BSM_TOKEN_IP', BSM_TOKEN_IP),
      44: ('BSM_TOKEN_PORT', BSM_TOKEN_PORT),
      45: ('BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32),
      47: ('BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE),
      96: ('BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME),
      113: ('BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64),
      114: ('BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64),
      116: ('BSM_HEADER32_EX', BSM_HEADER32_EX),
      119: ('BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64),
      122: ('BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX),
      127: ('BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX),
      128: ('BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32)}

  # Untested structures.
  # When not tested structure is found, we try to parse using also
  # these structures.
  BSM_TYPE_LIST_NOT_TESTED = {
      49: ('BSM_TOKEN_ATTR', BSM_TOKEN_ATTR32),
      50: ('BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM),
      52: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      59: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      60: ('BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS),
      61: ('BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV),
      62: ('BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32),
      82: ('BSM_TOKEN_EXIT', BSM_TOKEN_EXIT),
      115: ('BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64),
      117: ('BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64),
      123: ('BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX),
      124: ('BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX),
      125: ('BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX),
      126: ('BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT),
      129: ('BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128),
      130: ('BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX)}

  MESSAGE_CAN_NOT_SAVE = (
      'Plaso: some tokens from this entry can not be saved. Entry at 0x{0:X} '
      'with unknown token id "0x{1:X}".')

  # BSM token types:
  # https://github.com/openbsm/openbsm/blob/master/sys/bsm/audit_record.h
  _BSM_TOKEN_TYPE_ARGUMENT32 = 45
  _BSM_TOKEN_TYPE_ARGUMENT64 = 113
  _BSM_TOKEN_TYPE_ATTR = 49
  _BSM_TOKEN_TYPE_ATTR32 = 62
  _BSM_TOKEN_TYPE_ATTR64 = 115
  _BSM_TOKEN_TYPE_EXEC_ARGUMENTS = 60
  _BSM_TOKEN_TYPE_EXEC_ENV = 61
  _BSM_TOKEN_TYPE_EXIT = 82
  _BSM_TOKEN_TYPE_HEADER32 = 20
  _BSM_TOKEN_TYPE_HEADER32_EX = 116
  _BSM_TOKEN_TYPE_HEADER64 = 21
  _BSM_TOKEN_TYPE_PATH = 35
  _BSM_TOKEN_TYPE_PROCESS32 = 38
  _BSM_TOKEN_TYPE_PROCESS32_EX = 123
  _BSM_TOKEN_TYPE_PROCESS64 = 119
  _BSM_TOKEN_TYPE_PROCESS64_EX = 124
  _BSM_TOKEN_TYPE_RETURN32 = 39
  _BSM_TOKEN_TYPE_RETURN64 = 114
  _BSM_TOKEN_TYPE_SUBJECT32 = 36
  _BSM_TOKEN_TYPE_SUBJECT32_EX = 122
  _BSM_TOKEN_TYPE_SUBJECT64 = 117
  _BSM_TOKEN_TYPE_SUBJECT64_EX = 125
  _BSM_TOKEN_TYPE_TEXT = 40
  _BSM_TOKEN_TYPE_ZONENAME = 96

  _BSM_ARGUMENT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ARGUMENT32,
      _BSM_TOKEN_TYPE_ARGUMENT64)

  _BSM_ATTR_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ATTR,
      _BSM_TOKEN_TYPE_ATTR32,
      _BSM_TOKEN_TYPE_ATTR64)

  _BSM_EXEV_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXEC_ARGUMENTS,
      _BSM_TOKEN_TYPE_EXEC_ENV)

  _BSM_HEADER_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_HEADER32,
      _BSM_TOKEN_TYPE_HEADER32_EX,
      _BSM_TOKEN_TYPE_HEADER64)

  _BSM_PROCESS_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32,
      _BSM_TOKEN_TYPE_PROCESS64)

  _BSM_PROCESS_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32_EX,
      _BSM_TOKEN_TYPE_PROCESS64_EX)

  _BSM_RETURN_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXIT,
      _BSM_TOKEN_TYPE_RETURN32,
      _BSM_TOKEN_TYPE_RETURN64)

  _BSM_SUBJECT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32,
      _BSM_TOKEN_TYPE_SUBJECT64)

  _BSM_SUBJECT_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32_EX,
      _BSM_TOKEN_TYPE_SUBJECT64_EX)

  _BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PATH,
      _BSM_TOKEN_TYPE_TEXT,
      _BSM_TOKEN_TYPE_ZONENAME)

  def __init__(self):
    """Initializes a parser object."""
    super(BSMParser, self).__init__()
    # Create the dictionary with all token IDs: tested and untested.
    self._bsm_type_list_all = self._BSM_TOKEN_TYPES.copy()
    self._bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

  def _CopyByteArrayToBase16String(self, byte_array):
    """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array (bytes): A byte array.

    Returns:
      str: a base-16 encoded Unicode string.
    """
    return ''.join(['{0:02x}'.format(byte) for byte in byte_array])

  def _CopyUtf8ByteArrayToString(self, byte_array):
    """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array (bytes): A byte array containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    byte_stream = b''.join(map(chr, byte_array))

    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning('Unable to decode UTF-8 formatted byte array.')
      string = byte_stream.decode('utf-8', errors='ignore')

    string, _, _ = string.partition(b'\x00')
    return string

  def _IPv4Format(self, address):
    """Formats an IPv4 address as a human readable string.

    Args:
      address (int): IPv4 address.

    Returns:
      str: human readable string of IPv4 address in 4 octet representation:
          "1.2.3.4".
    """
    ipv4_string = self.IPV4_STRUCT.build(address)
    return socket.inet_ntoa(ipv4_string)

  def _IPv6Format(self, high, low):
    """Formats an IPv6 address as a human readable string.

    Args:
      high (int): upper 64-bit part of the IPv6 address.
      low (int): lower 64-bit part of the IPv6 address.

    Returns:
      str: human readable string of IPv6 address.
    """
    ipv6_string = self.IPV6_STRUCT.build(
        construct.Container(high=high, low=low))
    # socket.inet_ntop not supported in Windows.
    if hasattr(socket, 'inet_ntop'):
      return socket.inet_ntop(socket.AF_INET6, ipv6_string)

    # TODO: this approach returns double "::", illegal IPv6 addr.
    str_address = binascii.hexlify(ipv6_string)
    address = []
    blank = False
    for pos in range(0, len(str_address), 4):
      if str_address[pos:pos + 4] == '0000':
        if not blank:
          address.append('')
          blank = True
      else:
        blank = False
        address.append(str_address[pos:pos + 4].lstrip('0'))
    return ':'.join(address)

  def _ParseBSMEvent(self, parser_mediator, file_object):
    """Parses a BSM entry (BSMEvent) from the file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if the BSM entry was parsed.
    """
    record_start_offset = file_object.tell()

    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM token type at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      parser_mediator.ProduceExtractionError(
          'unsupported token type: {0:d} at offset: 0x{1:08x}.'.format(
              token_type, record_start_offset))
      # TODO: if it is a Mac OS X, search for the trailer magic value
      #       as a end of the entry can be a possibility to continue.
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      token = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM record at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    event_type = bsmtoken.BSM_AUDIT_EVENT.get(
        token.bsm_header.event_type, 'UNKNOWN')
    event_type = '{0:s} ({1:d})'.format(
        event_type, token.bsm_header.event_type)

    timestamp = (token.timestamp * 1000000) + token.microseconds
    date_time = dfdatetime_posix_time.PosixTimeInMicroseconds(
        timestamp=timestamp)

    record_length = token.bsm_header.length
    record_end_offset = record_start_offset + record_length

    # A dict of tokens that has the entry.
    extra_tokens = {}

    # Read until we reach the end of the record.
    while file_object.tell() < record_end_offset:
      # Check if it is a known token.
      try:
        token_type = self._BSM_TOKEN.parse_stream(file_object)
      except (IOError, construct.FieldError):
        logging.warning(
            'Unable to parse the Token ID at position: {0:d}'.format(
                file_object.tell()))
        return False

      _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

      if not record_structure:
        pending = record_end_offset - file_object.tell()
        new_extra_tokens = self.TryWithUntestedStructures(
            file_object, token_type, pending)
        extra_tokens.update(new_extra_tokens)
      else:
        token = record_structure.parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_type, token, file_object)
        extra_tokens.update(new_extra_tokens)

    if file_object.tell() > record_end_offset:
      logging.warning(
          'Token ID {0:d} not expected at position 0x{1:08x}.'
          'Jumping for the next entry.'.format(
              token_type, file_object.tell()))
      try:
        file_object.seek(
            record_end_offset - file_object.tell(), os.SEEK_CUR)
      except (IOError, construct.FieldError) as exception:
        logging.warning(
            'Unable to jump to next entry with error: {0:s}'.format(exception))
        return False

    # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
    if parser_mediator.platform != 'MacOSX':
      event_data = BSMEventData()
    else:
      event_data = MacBSMEventData()

      # In Mac OS X the last two tokens are the return status and the trailer.
      return_value = extra_tokens.get('BSM_TOKEN_RETURN32')
      if not return_value:
        return_value = extra_tokens.get('BSM_TOKEN_RETURN64')
      if not return_value:
        return_value = 'UNKNOWN'

      event_data.return_value = return_value

    event_data.event_type = event_type
    event_data.extra_tokens = extra_tokens
    event_data.offset = record_start_offset
    event_data.record_length = record_length

    # TODO: check why trailer was passed to event in original while
    # event was expecting record length.
    # if extra_tokens:
    #   trailer = extra_tokens.get('BSM_TOKEN_TRAILER', 'unknown')

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_CREATION)
    parser_mediator.ProduceEventWithEventData(event, event_data)

    return True

  def _RawToUTF8(self, byte_stream):
    """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream (bytes): byte stream containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning(
          'Decode UTF8 failed, the message string may be cut short.')
      string = byte_stream.decode('utf-8', errors='ignore')
    return string.partition(b'\x00')[0]

  def ParseFileObject(self, parser_mediator, file_object, **kwargs):
    """Parses a BSM file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    try:
      is_bsm = self.VerifyFile(parser_mediator, file_object)
    except (IOError, construct.FieldError) as exception:
      raise errors.UnableToParseFile(
          'Unable to parse BSM file with error: {0:s}'.format(exception))

    if not is_bsm:
      raise errors.UnableToParseFile('Not a BSM File, unable to parse.')

    file_object.seek(0, os.SEEK_SET)

    while self._ParseBSMEvent(parser_mediator, file_object):
      pass

  def VerifyFile(self, parser_mediator, file_object):
    """Check if the file is a BSM file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if this is a valid BSM file, False otherwise.
    """
    # First part of the entry is always a Header.
    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      header = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
      return False

    try:
      token_identifier = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    # If is Mac OS X BSM file, next entry is a  text token indicating
    # if it is a normal start or it is a recovery track.
    if parser_mediator.platform == 'MacOSX':
      token_type, record_structure = self._BSM_TOKEN_TYPES.get(
          token_identifier, ('', None))

      if not record_structure:
        return False

      if token_type != 'BSM_TOKEN_TEXT':
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

      try:
        token = record_structure.parse_stream(file_object)
      except (IOError, construct.FieldError):
        return

      text = self._CopyUtf8ByteArrayToString(token.text)
      if (text != 'launchctl::Audit startup' and
          text != 'launchctl::Audit recovery'):
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

    return True

  def TryWithUntestedStructures(self, file_object, token_id, pending):
    """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
    # Data from the unknown structure.
    start_position = file_object.tell()
    start_token_id = token_id
    extra_tokens = {}

    # Read all the "pending" bytes.
    try:
      if token_id in self._bsm_type_list_all:
        token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_id, token, file_object)
        extra_tokens.update(new_extra_tokens)
        while file_object.tell() < (start_position + pending):
          # Check if it is a known token.
          try:
            token_id = self._BSM_TOKEN.parse_stream(file_object)
          except (IOError, construct.FieldError):
            logging.warning(
                'Unable to parse the Token ID at position: {0:d}'.format(
                    file_object.tell()))
            return
          if token_id not in self._bsm_type_list_all:
            break
          token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
          new_extra_tokens = self.FormatToken(token_id, token, file_object)
          extra_tokens.update(new_extra_tokens)
    except (IOError, construct.FieldError):
      token_id = 255

    next_entry = (start_position + pending)
    if file_object.tell() != next_entry:
      # Unknown Structure.
      logging.warning('Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
          start_position - 1, token_id, token_id))
      # TODO: another way to save this information must be found.
      extra_tokens.update(
          {'message': self.MESSAGE_CAN_NOT_SAVE.format(
              start_position - 1, start_token_id)})
      # Move to next entry.
      file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
      # It returns null list because it doesn't know witch structure was
      # the incorrect structure that makes that it can arrive to the spected
      # end of the entry.
      return {}
    return extra_tokens

  def FormatToken(self, token_id, token, file_object):
    """Parse the Token depending of the type of the structure.

    Args:
      token_id (int): identification of the token_type.
      token (structure): token struct to parse.
      file_object: BSM file.

    Returns:
      (dict): parsed Token values.

    Keys for returned dictionary are token name like BSM_TOKEN_SUBJECT32.
    Values of this dictionary are key-value pairs like terminal_ip:127.0.0.1.
    """
    if token_id not in self._bsm_type_list_all:
      return {}

    bsm_type, _ = self._bsm_type_list_all.get(token_id, ['', ''])

    if token_id in self._BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES:
      try:
        string = self._CopyUtf8ByteArrayToString(token.text)
      except TypeError:
        string = 'Unknown'
      return {bsm_type: string}

    elif token_id in self._BSM_RETURN_TOKEN_TYPES:
      return {bsm_type: {
          'error': bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'),
          'token_status': token.status,
          'call_status': token.return_value
      }}

    elif token_id in self._BSM_SUBJECT_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_SUBJECT_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif token_id in self._BSM_ARGUMENT_TOKEN_TYPES:
      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {
          'string': string,
          'num_arg': token.num_arg,
          'is': token.name_arg}}

    elif token_id in self._BSM_EXEV_TOKEN_TYPES:
      arguments = []
      for _ in range(0, token):
        sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object)
        string = self._CopyUtf8ByteArrayToString(sub_token.text)
        arguments.append(string)
      return {bsm_type: ' '.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv4Format(token.ipv4)
      }}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_ADDR':
      return {bsm_type: self._IPv4Format(token)}

    elif bsm_type == 'BSM_TOKEN_IP':
      return {'IPv4_Header': '0x{0:s}]'.format(token.encode('hex'))}

    elif bsm_type == 'BSM_TOKEN_ADDR_EXT':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_PORT':
      return {bsm_type: token}

    elif bsm_type == 'BSM_TOKEN_TRAILER':
      return {bsm_type: token.record_length}

    elif bsm_type == 'BSM_TOKEN_FILE':
      # TODO: if this timestamp is usefull, it must be extracted as a separate
      #       event object.
      timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
          token.timestamp, token.microseconds)
      date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
      date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S')

      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {'string': string, 'timestamp': date_time_string}}

    elif bsm_type == 'BSM_TOKEN_IPC':
      return {bsm_type: {
          'object_type': token.object_type,
          'object_id': token.object_id
      }}

    elif token_id in self._BSM_PROCESS_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_PROCESS_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif bsm_type == 'BSM_TOKEN_DATA':
      data = []
      data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '')

      if data_type == 'AUR_CHAR':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))

      elif data_type == 'AUR_SHORT':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_SHORT.parse_stream(file_object))

      elif data_type == 'AUR_INT32':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))

      else:
        data.append('Unknown type data')

      # TODO: the data when it is string ends with ".", HW a space is return
      #       after uses the UTF-8 conversion.
      return {bsm_type: {
          'format': bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
          'data':
          '{0}'.format(self._RawToUTF8(''.join(map(str, data))))
      }}

    elif token_id in self._BSM_ATTR_TOKEN_TYPES:
      return {bsm_type: {
          'mode': token.file_mode,
          'uid': token.uid,
          'gid': token.gid,
          'system_id': token.file_system_id,
          'node_id': token.file_system_node_id,
          'device': token.device}}

    elif bsm_type == 'BSM_TOKEN_GROUPS':
      arguments = []
      for _ in range(token):
        arguments.append(
            self._RawToUTF8(
                self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
      return {bsm_type: ','.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX':
      if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6':
        saddr = self._IPv6Format(
            token.structure_addr_port.saddr_high,
            token.structure_addr_port.saddr_low)
        daddr = self._IPv6Format(
            token.structure_addr_port.daddr_high,
            token.structure_addr_port.daddr_low)
      else:
        saddr = self._IPv4Format(token.structure_addr_port.source_address)
        daddr = self._IPv4Format(token.structure_addr_port.destination_address)

      return {bsm_type:{
          'from': saddr,
          'from_port': token.structure_addr_port.source_port,
          'to': daddr,
          'to_port': token.structure_addr_port.destination_port}}

    elif bsm_type == 'BSM_TOKEN_IPC_PERM':
      return {bsm_type: {
          'user_id': token.user_id,
          'group_id': token.group_id,
          'creator_user_id': token.creator_user_id,
          'creator_group_id': token.creator_group_id,
          'access': token.access_mode}}

    elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX':
      string = self._CopyUtf8ByteArrayToString(token.path)
      return {bsm_type: {'family': token.family, 'path': string}}

    elif bsm_type == 'BSM_TOKEN_OPAQUE':
      string = self._CopyByteArrayToBase16String(token.text)
      return {bsm_type: string}

    elif bsm_type == 'BSM_TOKEN_SEQUENCE':
      return {bsm_type: token}

Example #2

Show file

File: java_idx.py Project: cvandeplas/plaso

class JavaIDXParser(interface.BaseParser):
    """Parse Java IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    NAME = 'java_idx'
    DESCRIPTION = u'Parser for Java IDX files.'

    IDX_SHORT_STRUCT = construct.Struct('magic', construct.UBInt8('busy'),
                                        construct.UBInt8('incomplete'),
                                        construct.UBInt32('idx_version'))

    IDX_602_STRUCT = construct.Struct(
        'IDX_602_Full', construct.UBInt16('null_space'),
        construct.UBInt8('shortcut'), construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.PascalString('version_string',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespace',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        'IDX_605_Section1', construct.UBInt8('shortcut'),
        construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.UBInt64('validation_date'), construct.UBInt8('signed'),
        construct.UBInt32('sec2len'), construct.UBInt32('sec3len'),
        construct.UBInt32('sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        'IDX_605_Section2',
        construct.PascalString('version',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespec',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('ip_address',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        'Java.ReadUTF',
        construct.PascalString('string',
                               length_field=construct.UBInt16('length')))

    def Parse(self, parser_context, file_entry):
        """Extract data from a Java cache IDX file.

    This is the main parsing engine for the parser. It determines if
    the selected file is a proper IDX file. It then checks the file
    version to determine the correct structure to apply to extract
    data.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = 'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = 'Unknown'
                ip_address = 'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == 'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                download_date = timelib.Timestamp.FromTimeString(
                    value.string, gmt_as_timezone=False)

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        last_modified_timestamp = timelib.Timestamp.FromJavaTime(
            last_modified_date)
        # TODO: Move the timestamp description fields into eventdata.
        event_object = JavaIDXEvent(last_modified_timestamp,
                                    'File Hosted Date', magic.idx_version, url,
                                    ip_address)
        parser_context.ProduceEvent(event_object,
                                    parser_name=self.NAME,
                                    file_entry=file_entry)

        if section_one:
            expiration_date = section_one.get('expiration_date', None)
            if expiration_date:
                expiration_timestamp = timelib.Timestamp.FromJavaTime(
                    expiration_date)
                event_object = JavaIDXEvent(expiration_timestamp,
                                            'File Expiration Date',
                                            magic.idx_version, url, ip_address)
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

        if download_date:
            event_object = JavaIDXEvent(
                download_date, eventdata.EventTimestamp.FILE_DOWNLOADED,
                magic.idx_version, url, ip_address)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

Example #3

Show file

File: interface.py Project: NicoLuuu/plaso

class ESEDBPlugin(plugins.BasePlugin):
  """The ESE database plugin interface."""

  NAME = 'esedb'

  BINARY_DATA_COLUMN_TYPES = frozenset([
      pyesedb.column_types.BINARY_DATA,
      pyesedb.column_types.LARGE_BINARY_DATA])

  FLOATING_POINT_COLUMN_TYPES = frozenset([
      pyesedb.column_types.FLOAT_32BIT,
      pyesedb.column_types.DOUBLE_64BIT])

  INTEGER_COLUMN_TYPES = frozenset([
      pyesedb.column_types.CURRENCY,
      pyesedb.column_types.DATE_TIME,
      pyesedb.column_types.INTEGER_8BIT_UNSIGNED,
      pyesedb.column_types.INTEGER_16BIT_SIGNED,
      pyesedb.column_types.INTEGER_16BIT_UNSIGNED,
      pyesedb.column_types.INTEGER_32BIT_SIGNED,
      pyesedb.column_types.INTEGER_32BIT_UNSIGNED,
      pyesedb.column_types.INTEGER_64BIT_SIGNED])

  STRING_COLUMN_TYPES = frozenset([
      pyesedb.column_types.TEXT,
      pyesedb.column_types.LARGE_TEXT])

  _UINT64_BIG_ENDIAN = construct.UBInt64('value')
  _UINT64_LITTLE_ENDIAN = construct.ULInt64('value')

  # Dictionary containing a callback method per table name.
  # E.g. 'SystemIndex_0A': 'ParseSystemIndex_0A'
  REQUIRED_TABLES = {}
  OPTIONAL_TABLES = {}

  def __init__(self):
    """Initializes the ESE database plugin."""
    super(ESEDBPlugin, self).__init__()
    self._tables = {}
    self._tables.update(self.REQUIRED_TABLES)
    self._tables.update(self.OPTIONAL_TABLES)

  @property
  def required_tables(self):
    """set[str]: required table names."""
    return frozenset(self.REQUIRED_TABLES.keys())

  def _ConvertValueBinaryDataToStringAscii(self, value):
    """Converts a binary data value into a string.

    Args:
      value (bytes): binary data value containing an ASCII string or None.

    Returns:
      str: string representation of binary data value or None.
    """
    if value:
      return value.decode('ascii')

    return None

  def _ConvertValueBinaryDataToStringBase16(self, value):
    """Converts a binary data value into a base-16 (hexadecimal) string.

    Args:
      value (bytes): binary data value or None.

    Returns:
      str: string representation of binary data value or None.
    """
    if value:
      return value.encode('hex')

    return None

  def _ConvertValueBinaryDataToUBInt64(self, value):
    """Converts a binary data value into an integer.

    Args:
      value (bytes): binary data value containing an unsigned 64-bit big-endian
          integer.

    Returns:
      int: integer representation of binary data value or None.
    """
    if value:
      return self._UINT64_BIG_ENDIAN.parse(value)

    return None

  def _ConvertValueBinaryDataToULInt64(self, value):
    """Converts a binary data value into an integer.

    Args:
      value (int): binary data value containing an unsigned 64-bit little-endian
          integer.

    Returns:
      int: integer representation of binary data value or None.
    """
    if value:
      return self._UINT64_LITTLE_ENDIAN.parse(value)

    return None

  def _GetRecordValue(self, record, value_entry):
    """Retrieves a specific value from the record.

    Args:
      record (pyesedb.record): ESE record.
      value_entry (int): value entry.

    Returns:
      object: value.

    Raises:
      ValueError: if the value is not supported.
    """
    column_type = record.get_column_type(value_entry)
    long_value = None

    if record.is_long_value(value_entry):
      long_value = record.get_value_data_as_long_value(value_entry)

    if record.is_multi_value(value_entry):
      # TODO: implement
      raise ValueError('Multi value support not implemented yet.')

    if column_type == pyesedb.column_types.NULL:
      return None

    elif column_type == pyesedb.column_types.BOOLEAN:
      # TODO: implement
      raise ValueError('Boolean value support not implemented yet.')

    elif column_type in self.INTEGER_COLUMN_TYPES:
      if long_value:
        raise ValueError('Long integer value not supported.')
      return record.get_value_data_as_integer(value_entry)

    elif column_type in self.FLOATING_POINT_COLUMN_TYPES:
      if long_value:
        raise ValueError('Long floating point value not supported.')
      return record.get_value_data_as_floating_point(value_entry)

    elif column_type in self.STRING_COLUMN_TYPES:
      if long_value:
        return long_value.get_data_as_string()
      return record.get_value_data_as_string(value_entry)

    elif column_type == pyesedb.column_types.GUID:
      # TODO: implement
      raise ValueError('GUID value support not implemented yet.')

    if long_value:
      return long_value.get_data()
    return record.get_value_data(value_entry)

  def _GetRecordValues(
      self, parser_mediator, table_name, record, value_mappings=None):
    """Retrieves the values from the record.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      table_name (str): name of the table.
      record (pyesedb.record): ESE record.
      value_mappings (Optional[dict[str, str]): value mappings, which map
          the column name to a callback method.

    Returns:
      dict[str,object]: values per column name.
    """
    record_values = {}

    for value_entry in range(0, record.number_of_values):
      if parser_mediator.abort:
        break

      column_name = record.get_column_name(value_entry)
      if column_name in record_values:
        logger.warning(
            '[{0:s}] duplicate column: {1:s} in table: {2:s}'.format(
                self.NAME, column_name, table_name))
        continue

      value_callback = None
      if value_mappings and column_name in value_mappings:
        value_callback_method = value_mappings.get(column_name)
        if value_callback_method:
          value_callback = getattr(self, value_callback_method, None)
          if value_callback is None:
            logger.warning((
                '[{0:s}] missing value callback method: {1:s} for column: '
                '{2:s} in table: {3:s}').format(
                    self.NAME, value_callback_method, column_name, table_name))

      if value_callback:
        try:
          value_data = record.get_value_data(value_entry)
          value = value_callback(value_data)

        except Exception as exception:  # pylint: disable=broad-except
          logger.error(exception)
          value = None
          parser_mediator.ProduceExtractionError((
              'unable to parse value: {0:s} with callback: {1:s} with error: '
              '{2!s}').format(column_name, value_callback_method, exception))

      else:
        try:
          value = self._GetRecordValue(record, value_entry)
        except ValueError as exception:
          value = None
          parser_mediator.ProduceExtractionError(
              'unable to parse value: {0:s} with error: {1!s}'.format(
                  column_name, exception))

      record_values[column_name] = value

    return record_values

  # pylint 1.9.3 wants a docstring for kwargs, but this is not useful to add.
  # pylint: disable=missing-param-doc
  def GetEntries(self, parser_mediator, cache=None, database=None, **kwargs):
    """Extracts event objects from the database.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      cache (Optional[ESEDBCache]): cache.
      database (Optional[pyesedb.file]): ESE database.

    Raises:
      ValueError: If the database attribute is not valid.
    """
    if database is None:
      raise ValueError('Invalid database.')

    for table_name, callback_method in iter(self._tables.items()):
      if parser_mediator.abort:
        break

      if not callback_method:
        # Table names without a callback method are allowed to improve
        # the detection of a database based on its table names.
        continue

      callback = getattr(self, callback_method, None)
      if callback is None:
        logger.warning(
            '[{0:s}] missing callback method: {1:s} for table: {2:s}'.format(
                self.NAME, callback_method, table_name))
        continue

      esedb_table = database.get_table_by_name(table_name)
      if not esedb_table:
        logger.warning('[{0:s}] missing table: {1:s}'.format(
            self.NAME, table_name))
        continue

      # The database is passed in case the database contains table names
      # that are assigned dynamically and cannot be defined by
      # the table name-callback mechanism.
      callback(
          parser_mediator, cache=cache, database=database, table=esedb_table,
          **kwargs)

  # pylint 1.9.3 wants a docstring for kwargs, but this is not useful to add.
  # pylint: disable=missing-param-doc,arguments-differ
  def Process(self, parser_mediator, cache=None, database=None, **kwargs):
    """Determines if this is the appropriate plugin for the database.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      cache (Optional[ESEDBCache]): cache.
      database (Optional[pyesedb.file]): ESE database.

    Raises:
      ValueError: If the database attribute is not valid.
    """
    if database is None:
      raise ValueError('Invalid database.')

    # This will raise if unhandled keyword arguments are passed.
    super(ESEDBPlugin, self).Process(parser_mediator)

    self.GetEntries(
        parser_mediator, cache=cache, database=database, **kwargs)

Example #4

Show file

File: interface.py Project: vertigo0001/plaso

class EseDbPlugin(plugins.BasePlugin):
    """The ESE database plugin interface."""

    NAME = u'esedb'

    BINARY_DATA_COLUMN_TYPES = frozenset([
        pyesedb.column_types.BINARY_DATA,
        pyesedb.column_types.LARGE_BINARY_DATA
    ])

    FLOATING_POINT_COLUMN_TYPES = frozenset(
        [pyesedb.column_types.FLOAT_32BIT, pyesedb.column_types.DOUBLE_64BIT])

    INTEGER_COLUMN_TYPES = frozenset([
        pyesedb.column_types.CURRENCY, pyesedb.column_types.DATE_TIME,
        pyesedb.column_types.INTEGER_8BIT_UNSIGNED,
        pyesedb.column_types.INTEGER_16BIT_SIGNED,
        pyesedb.column_types.INTEGER_16BIT_UNSIGNED,
        pyesedb.column_types.INTEGER_32BIT_SIGNED,
        pyesedb.column_types.INTEGER_32BIT_UNSIGNED,
        pyesedb.column_types.INTEGER_64BIT_SIGNED
    ])

    STRING_COLUMN_TYPES = frozenset(
        [pyesedb.column_types.TEXT, pyesedb.column_types.LARGE_TEXT])

    _UINT64_BIG_ENDIAN = construct.UBInt64(u'value')
    _UINT64_LITTLE_ENDIAN = construct.ULInt64(u'value')

    # Dictionary containing a callback method per table name.
    # E.g. 'SystemIndex_0A': 'ParseSystemIndex_0A'
    REQUIRED_TABLES = {}
    OPTIONAL_TABLES = {}

    def __init__(self):
        """Initializes the ESE database plugin."""
        super(EseDbPlugin, self).__init__()
        self._required_tables = frozenset(self.REQUIRED_TABLES.keys())
        self._tables = {}
        self._tables.update(self.REQUIRED_TABLES)
        self._tables.update(self.OPTIONAL_TABLES)

    def _ConvertValueBinaryDataToStringAscii(self, value):
        """Converts a binary data value into a string.

    Args:
      value: The binary data value containing an ASCII string or None.

    Returns:
      A string or None if value is None.
    """
        if value:
            return value.decode(u'ascii')

    def _ConvertValueBinaryDataToStringBase16(self, value):
        """Converts a binary data value into a base-16 (hexadecimal) string.

    Args:
      value: The binary data value or None.

    Returns:
      A string or None if value is None.
    """
        if value:
            return value.encode(u'hex')

    def _ConvertValueBinaryDataToUBInt64(self, value):
        """Converts a binary data value into an integer.

    Args:
      value: The binary data value containing an unsigned 64-bit big-endian
             integer.

    Returns:
      An integer or None if value is None.
    """
        if value:
            return self._UINT64_BIG_ENDIAN.parse(value)

    def _ConvertValueBinaryDataToULInt64(self, value):
        """Converts a binary data value into an integer.

    Args:
      value: The binary data value containing an unsigned 64-bit little-endian
             integer.

    Returns:
      An integer or None if value is None.
    """
        if value:
            return self._UINT64_LITTLE_ENDIAN.parse(value)

    def _GetRecordValue(self, record, value_entry):
        """Retrieves a specific value from the record.

    Args:
      record: The ESE record object (instance of pyesedb.record).
      value_entry: The value entry.

    Returns:
      An object containing the value.

    Raises:
      ValueError: if the value is not supported.
    """
        column_type = record.get_column_type(value_entry)
        long_value = None

        if record.is_long_value(value_entry):
            long_value = record.get_value_data_as_long_value(value_entry)

        if record.is_multi_value(value_entry):
            # TODO: implement
            raise ValueError(u'Multi value support not implemented yet.')

        if column_type == pyesedb.column_types.NULL:
            return

        elif column_type == pyesedb.column_types.BOOLEAN:
            # TODO: implement
            raise ValueError(u'Boolean value support not implemented yet.')

        elif column_type in self.INTEGER_COLUMN_TYPES:
            if long_value:
                raise ValueError(u'Long integer value not supported.')
            return record.get_value_data_as_integer(value_entry)

        elif column_type in self.FLOATING_POINT_COLUMN_TYPES:
            if long_value:
                raise ValueError(u'Long floating point value not supported.')
            return record.get_value_data_as_floating_point(value_entry)

        elif column_type in self.STRING_COLUMN_TYPES:
            if long_value:
                return long_value.get_data_as_string()
            return record.get_value_data_as_string(value_entry)

        elif column_type == pyesedb.column_types.GUID:
            # TODO: implement
            raise ValueError(u'GUID value support not implemented yet.')

        if long_value:
            return long_value.get_data()
        return record.get_value_data(value_entry)

    def _GetRecordValues(self, table_name, record, value_mappings=None):
        """Retrieves the values from the record.

    Args:
      table_name: The name of the table.
      record: The ESE record object (instance of pyesedb.record).
      value_mappings: Optional dict of value mappings, which map the column
                      name to a callback method. The default is None.

    Returns:
      An dict containing the values.
    """
        record_values = {}

        for value_entry in range(0, record.number_of_values):
            column_name = record.get_column_name(value_entry)
            if column_name in record_values:
                logging.warning(
                    u'[{0:s}] duplicate column: {1:s} in table: {2:s}'.format(
                        self.NAME, column_name, table_name))
                continue

            value_callback = None
            if value_mappings and column_name in value_mappings:
                value_callback_method = value_mappings.get(column_name)
                if value_callback_method:
                    value_callback = getattr(self, value_callback_method, None)
                    if value_callback is None:
                        logging.warning((
                            u'[{0:s}] missing value callback method: {1:s} for column: '
                            u'{2:s} in table: {3:s}').format(
                                self.NAME, value_callback_method, column_name,
                                table_name))

            try:
                value = self._GetRecordValue(record, value_entry)
            except ValueError as exception:
                logging.warning(exception)

            if value_callback:
                value = value_callback(value)

            record_values[column_name] = value

        return record_values

    def _GetTableNames(self, database):
        """Retrieves the table names in a database.

    Args:
      database: The ESE database object (instance of pyesedb.file).

    Returns:
      A list of the table names.
    """
        table_names = []
        for esedb_table in database.tables:
            table_names.append(esedb_table.name)

        return table_names

    def GetEntries(self, parser_mediator, database=None, cache=None, **kwargs):
        """Extracts event objects from the database.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      database: Optional ESE database object (instance of pyesedb.file).
                The default is None.
      cache: Optional cache object (instance of EseDbCache). The default is
             None.

    Raises:
      ValueError: If the database attribute is not valid.
    """
        if database is None:
            raise ValueError(u'Invalid database.')

        for table_name, callback_method in self._tables.iteritems():
            if not callback_method:
                # Table names without a callback method are allowed to improve
                # the detection of a database based on its table names.
                continue

            callback = getattr(self, callback_method, None)
            if callback is None:
                logging.warning(
                    u'[{0:s}] missing callback method: {1:s} for table: {2:s}'.
                    format(self.NAME, callback_method, table_name))
                continue

            esedb_table = database.get_table_by_name(table_name)
            if not esedb_table:
                logging.warning(u'[{0:s}] missing table: {1:s}'.format(
                    self.NAME, table_name))
                continue

            # The database is passed in case the database contains table names
            # that are assigned dynamically and cannot be defined by
            # the table name-callback mechanism.
            callback(parser_mediator,
                     database=database,
                     table=esedb_table,
                     cache=cache,
                     **kwargs)

    def Process(self, parser_mediator, database=None, cache=None, **kwargs):
        """Determines if this is the appropriate plugin for the database.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      database: Optional ESE database object (instance of pyesedb.file).
                The default is None.
      cache: Optional cache object (instance of EseDbCache). The default is
             None.

    Raises:
      errors.WrongPlugin: If the database does not contain all the tables
                          defined in the required_tables set.
      ValueError: If the database attribute is not valid.
    """
        if database is None:
            raise ValueError(u'Invalid database.')

        table_names = frozenset(self._GetTableNames(database))
        if self._required_tables.difference(table_names):
            raise errors.WrongPlugin(
                u'[{0:s}] required tables not found.'.format(self.NAME))

        # This will raise if unhandled keyword arguments are passed.
        super(EseDbPlugin, self).Process(parser_mediator)

        self.GetEntries(parser_mediator,
                        database=database,
                        cache=cache,
                        **kwargs)

Example #5

Show file

    1: 'ALERT',
    2: 'CRITICAL',
    3: 'ERROR',
    4: 'WARNING',
    5: 'NOTICE',
    6: 'INFO',
    7: 'DEBUG'
}

# ASL Required Structures.

# ASL header file structure
ASL_HEADER_STRUCT = construct.Struct('asl_header_struct',
                                     construct.String('magic', 12),
                                     construct.UBInt32('version'),
                                     construct.UBInt64('offset'),
                                     construct.UBInt64('timestamp'),
                                     construct.UBInt32('cache_size'),
                                     construct.UBInt64('last_offset'),
                                     construct.Padding(36))

# Record = [Heap][Record_Struct][Values]
# Heap = [Group of Dyn_Value]*
# Values = [ADDR_TXT][ADDR_TXT][ADDR_TXT][ADDR_TXT](2x[ADDR_TXT])*
#            (Host)   (Sender) (Facility) (message)

# Record Struct
ASL_RECORD_STRUCT = construct.Struct('asl_record_struct', construct.Padding(2),
                                     construct.UBInt32('tam_entry'),
                                     construct.UBInt64('next_offset'),
                                     construct.UBInt64('ASLMessageID'),

Example #6

Show file

File: asl.py Project: cvandeplas/plaso

class AslParser(interface.BaseParser):
    """Parser for ASL log files."""

    NAME = 'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00'

    # If not right assigned, the value is "-1".
    ASL_NO_RIGHTS = 'ffffffff'

    # Priority level (criticity)
    ASL_MESSAGE_PRIORITY = {
        0: 'EMERGENCY',
        1: 'ALERT',
        2: 'CRITICAL',
        3: 'ERROR',
        4: 'WARNING',
        5: 'NOTICE',
        6: 'INFO',
        7: 'DEBUG'
    }

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: epoch time when the first entry was written.
    # last_offset: last record in the file.
    ASL_HEADER_STRUCT = construct.Struct('asl_header_struct',
                                         construct.String('magic', 12),
                                         construct.UBInt32('version'),
                                         construct.UBInt64('offset'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('cache_size'),
                                         construct.UBInt64('last_offset'),
                                         construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: Epoch integer that has the time when the entry was created.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    ASL_RECORD_STRUCT = construct.Struct('asl_record_struct',
                                         construct.Padding(2),
                                         construct.UBInt32('tam_entry'),
                                         construct.UBInt64('next_offset'),
                                         construct.UBInt64('asl_message_id'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('nanosec'),
                                         construct.UBInt16('level'),
                                         construct.UBInt16('flags'),
                                         construct.UBInt32('pid'),
                                         construct.UBInt32('uid'),
                                         construct.UBInt32('gid'),
                                         construct.UBInt32('read_uid'),
                                         construct.UBInt32('read_gid'),
                                         construct.UBInt64('ref_pid'))

    ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    ASL_OCTET_STRING = construct.ExprAdapter(construct.Octet('string'),
                                             encoder=lambda obj, ctx: ord(obj),
                                             decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    ASL_STRING = construct.BitStruct(
        'string', construct.Flag('type'), construct.Bits('filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble('string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    ASL_POINTER = construct.UBInt64('pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes lenght of String][String].
    ASL_RECORD_DYN_VALUE = construct.Struct(
        'asl_record_dyn_value', construct.Padding(2),
        construct.PascalString('value',
                               length_field=construct.UBInt32('length')))

    def Parse(self, parser_context, file_entry):
        """Extract entries from an ASL file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self.ASL_MAGIC:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        # Get the first and the last entry.
        offset = header.offset
        old_offset = header.offset
        last_offset_header = header.last_offset

        # If the ASL file has entries.
        if offset:
            event_object, offset = self.ReadAslEvent(file_object, offset)
            while event_object:
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

                # TODO: an anomaly object must be emitted once that is implemented.
                # Sanity check, the last read element must be the same as
                # indicated by the header.
                if offset == 0 and old_offset != last_offset_header:
                    logging.warning(u'Parsing ended before the header ends.')
                old_offset = offset
                event_object, offset = self.ReadAslEvent(file_object, offset)

        file_object.close()

    def ReadAslEvent(self, file_object, offset):
        """Returns an AslEvent from a single ASL entry.

    Args:
      file_object: a file-like object that points to an ASL file.
      offset: offset where the static part of the entry starts.

    Returns:
      An event object constructed from a single ASL record.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_start = file_object.tell()
        dynamic_part = file_object.read(offset - file_object.tell())

        if not offset:
            return None, None

        try:
            record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            logging.warning(
                u'Unable to parse ASL event with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2x[8bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the exta field.
        # All of this 8-byte field can be saved using one of these three differents
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                raw_field = file_object.read(8)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:d}'.format(
                        exception))
                return None, None
            try:
                # Try to read as a String.
                field = self.ASL_STRING.parse(raw_field)
                values.append(''.join(field.string[0:field.string_length]))
                # Go to parse the next extra field.
                tam_fields -= 8
                continue
            except ValueError:
                pass
            # If it is not a string, it must be a pointer.
            try:
                field = self.ASL_POINTER.parse(raw_field)
            except ValueError as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:s}'.format(
                        exception))
                return None, None
            if field != 0:
                # The next IF ELSE is only for performance issues, avoiding seek.
                # If the pointer points a lower position than where the actual entry
                # starts, it means that it points to a previuos entry.
                pos = field - dynamic_start
                # Bigger or equal 0 means that the data is in the actual entry.
                if pos >= 0:
                    try:
                        values.append((self.ASL_RECORD_DYN_VALUE.parse(
                            dynamic_part[pos:])).value.partition('\x00')[0])
                    except (IOError, construct.FieldError) as exception:
                        logging.warning(
                            u'Unable to parse ASL event with error: {0:s}'.
                            format(exception))
                        return None, None
                else:
                    # Only if it is a pointer that points to the
                    # heap from another entry we use the seek method.
                    main_position = file_object.tell()
                    # If the pointer is in a previous entry.
                    if main_position > field:
                        file_object.seek(field - main_position, os.SEEK_CUR)
                        try:
                            values.append(
                                (self.ASL_RECORD_DYN_VALUE.parse_stream(
                                    file_object)).value.partition('\x00')[0])
                        except (IOError, construct.FieldError):
                            logging.warning((
                                u'The pointer at {0:d} (0x{0:x}) points to invalid '
                                u'information.'
                            ).format(main_position -
                                     self.ASL_POINTER.sizeof()))
                        # Come back to the position in the entry.
                        _ = file_object.read(main_position -
                                             file_object.tell())
                    else:
                        _ = file_object.read(field - main_position)
                        values.append((self.ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)).value.partition('\x00')[0])
                        # Come back to the position in the entry.
                        file_object.seek(main_position - file_object.tell(),
                                         os.SEEK_CUR)
            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        microsecond = record_header.nanosec // 1000
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            record_header.timestamp, microsecond)
        record_position = offset
        message_id = record_header.asl_message_id
        level = u'{0} ({1})'.format(
            self.ASL_MESSAGE_PRIORITY[record_header.level],
            record_header.level)
        # If the value is -1 (FFFFFFFF), it can be read by everyone.
        if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16):
            read_uid = record_header.read_uid
        else:
            read_uid = 'ALL'
        if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16):
            read_gid = record_header.read_gid
        else:
            read_gid = 'ALL'

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        computer_name = values[0]
        sender = values[1]
        facility = values[2]
        message = values[3]

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = ''
        if len(values) > 4:
            values = values[4:]
            for index in xrange(0, len(values) // 2):
                extra_information += (u'[{0}: {1}]'.format(
                    values[index * 2], values[(index * 2) + 1]))

        # Return the event and the offset for the next entry.
        return AslEvent(timestamp, record_position, message_id, level,
                        record_header, read_uid, read_gid, computer_name,
                        sender, facility, message,
                        extra_information), record_header.next_offset

Example #7

Show file

class ASLParser(interface.FileObjectParser):
    """Parser for ASL log files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    _ASL_MAGIC = b'ASL DB\x00\x00\x00\x00\x00\x00'

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: time when the first entry was written.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # last_offset: last record in the file.
    _ASL_HEADER_STRUCT = construct.Struct(u'asl_header_struct',
                                          construct.String(u'magic', 12),
                                          construct.UBInt32(u'version'),
                                          construct.UBInt64(u'offset'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'cache_size'),
                                          construct.UBInt64(u'last_offset'),
                                          construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: the entry creation date and time.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    _ASL_RECORD_STRUCT = construct.Struct(u'asl_record_struct',
                                          construct.Padding(2),
                                          construct.UBInt32(u'tam_entry'),
                                          construct.UBInt64(u'next_offset'),
                                          construct.UBInt64(u'asl_message_id'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'nanosec'),
                                          construct.UBInt16(u'level'),
                                          construct.UBInt16(u'flags'),
                                          construct.UBInt32(u'pid'),
                                          construct.UBInt32(u'uid'),
                                          construct.UBInt32(u'gid'),
                                          construct.UBInt32(u'read_uid'),
                                          construct.UBInt32(u'read_gid'),
                                          construct.UBInt64(u'ref_pid'))

    _ASL_RECORD_STRUCT_SIZE = _ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    _ASL_OCTET_STRING = construct.ExprAdapter(
        construct.Octet(u'string'),
        encoder=lambda obj, ctx: ord(obj),
        decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    _ASL_STRING = construct.BitStruct(
        u'string', construct.Flag(u'type'), construct.Bits(u'filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble(u'string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, _ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    _ASL_POINTER = construct.UBInt64(u'pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes size of String][String].
    _ASL_RECORD_DYN_VALUE = construct.Struct(
        u'asl_record_dyn_value', construct.Padding(2),
        construct.UBInt32(u'size'),
        construct.Bytes(u'value', lambda ctx: ctx.size))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses an ALS file-like object.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self._ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self._ASL_MAGIC:
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        offset = header.offset
        if not offset:
            return

        header_last_offset = header.last_offset

        previous_offset = offset
        event_object, offset = self.ReadASLEvent(parser_mediator, file_object,
                                                 offset)
        while event_object:
            # Sanity check, the last read element must be the same as
            # indicated by the header.
            if offset == 0 and previous_offset != header_last_offset:
                parser_mediator.ProduceParseError(
                    u'Unable to parse header. Last element header does not match '
                    u'header offset.')
            previous_offset = offset
            event_object, offset = self.ReadASLEvent(parser_mediator,
                                                     file_object, offset)

    def ReadASLEvent(self, parser_mediator, file_object, offset):
        """Reads an ASL record at a specific offset.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to an ASL file.
      offset: an integer containing the offset of the ASL record.

    Returns:
      A tuple of an event object extracted from the ASL record,
      and the offset to the next ASL record in the file.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_data_offset = file_object.tell()

        try:
            dynamic_data = file_object.read(offset - dynamic_data_offset)
        except IOError as exception:
            parser_mediator.ProduceParseError(
                u'unable to read ASL record dynamic data with error: {0:s}'.
                format(exception))
            return None, None

        if not offset:
            return None, None

        try:
            record_struct = self._ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceParseError(
                u'unable to parse ASL record with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_struct.tam_entry - self._ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2 x [8 bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the extra field.
        # All of this 8-byte field can be saved using one of these three different
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                field_data = file_object.read(8)
            except IOError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to read ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            # Try to read the field data as a string.
            try:
                asl_string_struct = self._ASL_STRING.parse(field_data)
                string_data = b''.join(
                    asl_string_struct.string[0:asl_string_struct.
                                             string_length])
                values.append(string_data)

                # Go to parse the next extra field.
                tam_fields -= 8
                continue

            except ValueError:
                pass

            # If the field is not a string it must be a pointer.
            try:
                pointer_value = self._ASL_POINTER.parse(field_data)
            except ValueError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to parse ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            if not pointer_value:
                # Next extra field: 8 bytes more.
                tam_fields -= 8
                continue

            # The next IF ELSE is only for performance issues, avoiding seek.
            # If the pointer points a lower position than where the actual entry
            # starts, it means that it points to a previous entry.
            pos = pointer_value - dynamic_data_offset

            # Greater or equal 0 means that the data is in the actual entry.
            if pos >= 0:
                try:
                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse(
                        dynamic_data[pos:])
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                except (IOError, construct.FieldError) as exception:
                    parser_mediator.ProduceParseError((
                        u'unable to parse ASL record dynamic value with error: '
                        u'{0:s}').format(exception))
                    return None, None

            else:
                # Only if it is a pointer that points to the
                # heap from another entry we use the seek method.
                main_position = file_object.tell()

                # If the pointer is in a previous entry.
                if main_position > pointer_value:
                    file_object.seek(pointer_value - main_position,
                                     os.SEEK_CUR)

                    try:
                        dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)
                        dyn_value = dyn_value_struct.value.partition(
                            b'\x00')[0]
                        values.append(dyn_value)

                    except (IOError, construct.FieldError):
                        parser_mediator.ProduceParseError((
                            u'the pointer at {0:d} (0x{0:08x}) points to invalid '
                            u'information.'
                        ).format(main_position - self._ASL_POINTER.sizeof()))

                    # Come back to the position in the entry.
                    _ = file_object.read(main_position - file_object.tell())

                else:
                    _ = file_object.read(pointer_value - main_position)

                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                        file_object)
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                    # Come back to the position in the entry.
                    file_object.seek(main_position - file_object.tell(),
                                     os.SEEK_CUR)

            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        micro_seconds, _ = divmod(record_struct.nanosec, 1000)

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        number_of_values = len(values)
        if number_of_values < 4:
            parser_mediator.ProduceParseError(
                u'less than four values read from an ASL event.')

        computer_name = u'N/A'
        sender = u'N/A'
        facility = u'N/A'
        message = u'N/A'

        if number_of_values >= 1:
            computer_name = values[0].decode(u'utf-8')

        if number_of_values >= 2:
            sender = values[1].decode(u'utf-8')

        if number_of_values >= 3:
            facility = values[2].decode(u'utf-8')

        if number_of_values >= 4:
            message = values[3].decode(u'utf-8')

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = u''
        if number_of_values > 4 and number_of_values % 2 == 0:
            # Taking all the extra attributes and merging them together,
            # eg: a = [1, 2, 3, 4] will look like "1: 2, 3: 4".
            try:
                extra_values = map(py2to3.UNICODE_TYPE, values[4:])
                extra_information = u', '.join(
                    map(u': '.join, zip(extra_values[0::2],
                                        extra_values[1::2])))
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceParseError(
                    u'Unable to decode all ASL values in the extra information fields.'
                )

        event_object = ASLEvent(record_struct.timestamp,
                                offset,
                                record_struct.asl_message_id,
                                record_struct.level,
                                record_struct.pid,
                                record_struct.uid,
                                record_struct.gid,
                                record_struct.read_uid,
                                record_struct.read_gid,
                                computer_name,
                                sender,
                                facility,
                                message,
                                extra_information,
                                micro_seconds=micro_seconds)
        parser_mediator.ProduceEvent(event_object)

        return (event_object, record_struct.next_offset)

Example #8

Show file

            assert False
        self.offset = min(self.offset, self.size())
        self.offset = max(self.offset, 0)

    def tell(self):
        return self.offset


FileVaultV2Header = construct.Struct(
    "FileVaultV2Header", construct.Magic("encrcdsa"),
    construct.UBInt32("version"), construct.UBInt32("encIVSize"),
    construct.UBInt32("_unk1"), construct.UBInt32("_unk2"),
    construct.UBInt32("keyBits"), construct.UBInt32("_unk4"),
    construct.UBInt32("_unk5"), construct.Array(4,
                                                construct.UBInt32("UDIFID")),
    construct.UBInt32("blockSize"), construct.UBInt64("dataSize"),
    construct.UBInt64("dataOffset"), construct.Padding(0x260),
    construct.UBInt32("kdfAlgorithm"), construct.UBInt32("kdfPRNGAlgorithm"),
    construct.UBInt32("kdfIterationCount"), construct.UBInt32("kdfSaltLen"),
    construct.String("kdfSalt", 0x20), construct.UBInt32("blobEncIVSize"),
    construct.String("blobEncIV", 0x20), construct.UBInt32("blobEncKeyBits"),
    construct.UBInt32("blobEncAlgorithm"), construct.UBInt32("blobEncPadding"),
    construct.UBInt32("blobEncMode"),
    construct.UBInt32("encryptedKeyblobSize"),
    construct.String("encryptedKeyblob", 0x30))


class FileVaultFile(BaseFile):
    def __init__(self, f, key):
        BaseFile.__init__(self)

Example #9

Show file

class BsmParser(interface.FileObjectParser):
    """Parser for BSM files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'bsm_log'
    DESCRIPTION = u'Parser for BSM log files.'

    # BSM supported version (0x0b = 11).
    AUDIT_HEADER_VERSION = 11

    # Magic Trail Header.
    BSM_TOKEN_TRAILER_MAGIC = b'b105'

    # IP Version constants.
    AU_IPv4 = 4
    AU_IPv6 = 16

    IPV4_STRUCT = construct.UBInt32(u'ipv4')

    IPV6_STRUCT = construct.Struct(u'ipv6', construct.UBInt64(u'high'),
                                   construct.UBInt64(u'low'))

    # Tested structures.
    # INFO: I have ommited the ID in the structures declaration.
    #       I used the BSM_TYPE first to read the ID, and then, the structure.
    # Tokens always start with an ID value that identifies their token
    # type and subsequent structure.
    BSM_TYPE = construct.UBInt8(u'token_id')

    # Data type structures.
    BSM_TOKEN_DATA_CHAR = construct.String(u'value', 1)
    BSM_TOKEN_DATA_SHORT = construct.UBInt16(u'value')
    BSM_TOKEN_DATA_INTEGER = construct.UBInt32(u'value')

    # Common structure used by other structures.
    # audit_uid: integer, uid that generates the entry.
    # effective_uid: integer, the permission user used.
    # effective_gid: integer, the permission group used.
    # real_uid: integer, user id of the user that execute the process.
    # real_gid: integer, group id of the group that execute the process.
    # pid: integer, identification number of the process.
    # session_id: unknown, need research.
    BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
        u'subject_data', construct.UBInt32(u'audit_uid'),
        construct.UBInt32(u'effective_uid'),
        construct.UBInt32(u'effective_gid'), construct.UBInt32(u'real_uid'),
        construct.UBInt32(u'real_gid'), construct.UBInt32(u'pid'),
        construct.UBInt32(u'session_id'))

    # Common structure used by other structures.
    # Identify the kind of inet (IPv4 or IPv6)
    # TODO: instead of 16, AU_IPv6 must be used.
    BSM_IP_TYPE_SHORT = construct.Struct(
        u'bsm_ip_type_short', construct.UBInt32(u'net_type'),
        construct.Switch(u'ip_addr',
                         _BsmTokenGetNetType, {16: IPV6_STRUCT},
                         default=IPV4_STRUCT))

    # Initial fields structure used by header structures.
    # length: integer, the length of the entry, equal to trailer (doc: length).
    # version: integer, version of BSM (AUDIT_HEADER_VERSION).
    # event_type: integer, the type of event (/etc/security/audit_event).
    # modifier: integer, unknown, need research (It is always 0).
    BSM_HEADER = construct.Struct(u'bsm_header', construct.UBInt32(u'length'),
                                  construct.UBInt8(u'version'),
                                  construct.UBInt16(u'event_type'),
                                  construct.UBInt16(u'modifier'))

    # First token of one entry.
    # timestamp: unsigned integer, number of seconds since
    #            January 1, 1970 00:00:00 UTC.
    # microsecond: unsigned integer, number of micro seconds.
    BSM_HEADER32 = construct.Struct(u'bsm_header32', BSM_HEADER,
                                    construct.UBInt32(u'timestamp'),
                                    construct.UBInt32(u'microsecond'))

    BSM_HEADER64 = construct.Struct(u'bsm_header64', BSM_HEADER,
                                    construct.UBInt64(u'timestamp'),
                                    construct.UBInt64(u'microsecond'))

    BSM_HEADER32_EX = construct.Struct(u'bsm_header32_ex', BSM_HEADER,
                                       BSM_IP_TYPE_SHORT,
                                       construct.UBInt32(u'timestamp'),
                                       construct.UBInt32(u'microsecond'))

    # Token TEXT, provides extra information.
    BSM_TOKEN_TEXT = construct.Struct(
        u'bsm_token_text', construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Path of the executable.
    BSM_TOKEN_PATH = BSM_TOKEN_TEXT

    # Identified the end of the record (follow by TRAILER).
    # status: integer that identifies the status of the exit (BSM_ERRORS).
    # return: returned value from the operation.
    BSM_TOKEN_RETURN32 = construct.Struct(u'bsm_token_return32',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt32(u'return_value'))

    BSM_TOKEN_RETURN64 = construct.Struct(u'bsm_token_return64',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt64(u'return_value'))

    # Identified the number of bytes that was written.
    # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
    # length: integer that has the number of bytes from the entry size.
    BSM_TOKEN_TRAILER = construct.Struct(u'bsm_token_trailer',
                                         construct.UBInt16(u'magic'),
                                         construct.UBInt32(u'record_length'))

    # A 32-bits argument.
    # num_arg: the number of the argument.
    # name_arg: the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT32 = construct.Struct(
        u'bsm_token_argument32', construct.UBInt8(u'num_arg'),
        construct.UBInt32(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # A 64-bits argument.
    # num_arg: integer, the number of the argument.
    # name_arg: text, the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT64 = construct.Struct(
        u'bsm_token_argument64', construct.UBInt8(u'num_arg'),
        construct.UBInt64(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Identify an user.
    # terminal_id: unknown, research needed.
    # terminal_addr: unknown, research needed.
    BSM_TOKEN_SUBJECT32 = construct.Struct(u'bsm_token_subject32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # Identify an user using a extended Token.
    # terminal_port: unknown, need research.
    # net_type: unknown, need research.
    BSM_TOKEN_SUBJECT32_EX = construct.Struct(
        u'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_opaque // AUT_OPAQUE
    BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

    # au_to_seq // AUT_SEQ
    BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

    # Program execution with options.
    # For each argument we are going to have a string+ "\x00".
    # Example: [00 00 00 02][41 42 43 00 42 42 00]
    #          2 Arguments, Arg1: [414243] Arg2: [4242].
    BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32(u'number_arguments')

    BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
        u'bsm_token_exec_argument',
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("text", 1)))

    # au_to_in_addr // AUT_IN_ADDR:
    BSM_TOKEN_ADDR = IPV4_STRUCT

    # au_to_in_addr_ext // AUT_IN_ADDR_EX:
    BSM_TOKEN_ADDR_EXT = construct.Struct(u'bsm_token_addr_ext',
                                          construct.UBInt32(u'net_type'),
                                          IPV6_STRUCT)

    # au_to_ip // AUT_IP:
    # TODO: parse this header in the correct way.
    BSM_TOKEN_IP = construct.String(u'binary_ipv4_add', 20)

    # au_to_ipc // AUT_IPC:
    BSM_TOKEN_IPC = construct.Struct(u'bsm_token_ipc',
                                     construct.UBInt8(u'object_type'),
                                     construct.UBInt32(u'object_id'))

    # au_to_ipc_perm // au_to_ipc_perm
    BSM_TOKEN_IPC_PERM = construct.Struct(
        u'bsm_token_ipc_perm', construct.UBInt32(u'user_id'),
        construct.UBInt32(u'group_id'), construct.UBInt32(u'creator_user_id'),
        construct.UBInt32(u'creator_group_id'),
        construct.UBInt32(u'access_mode'), construct.UBInt32(u'slot_seq'),
        construct.UBInt32(u'key'))

    # au_to_iport // AUT_IPORT:
    BSM_TOKEN_PORT = construct.UBInt16(u'port_number')

    # au_to_file // AUT_OTHER_FILE32:
    BSM_TOKEN_FILE = construct.Struct(
        u'bsm_token_file', construct.UBInt32(u'timestamp'),
        construct.UBInt32(u'microsecond'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # au_to_subject64 // AUT_SUBJECT64:
    BSM_TOKEN_SUBJECT64 = construct.Struct(u'bsm_token_subject64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_subject64_ex // AU_IPv4:
    BSM_TOKEN_SUBJECT64_EX = construct.Struct(
        u'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'),
        construct.UBInt32(u'terminal_type'), BSM_IP_TYPE_SHORT)

    # au_to_process32 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS32 = construct.Struct(u'bsm_token_process32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process64 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS64 = construct.Struct(u'bsm_token_process64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process32_ex // AUT_PROCESS32_EX:
    BSM_TOKEN_PROCESS32_EX = construct.Struct(
        u'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_process64_ex // AUT_PROCESS64_EX:
    BSM_TOKEN_PROCESS64_EX = construct.Struct(
        u'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt64(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_sock_inet32 // AUT_SOCKINET32:
    BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
        u'bsm_token_aut_sockinet32', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV4_STRUCT)

    # Info: checked against the source code of XNU, but not against
    #       real BSM file.
    BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
        u'bsm_token_aut_sockinet128', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV6_STRUCT)

    INET6_ADDR_TYPE = construct.Struct(u'addr_type',
                                       construct.UBInt16(u'ip_type'),
                                       construct.UBInt16(u'source_port'),
                                       construct.UBInt64(u'saddr_high'),
                                       construct.UBInt64(u'saddr_low'),
                                       construct.UBInt16(u'destination_port'),
                                       construct.UBInt64(u'daddr_high'),
                                       construct.UBInt64(u'daddr_low'))

    INET4_ADDR_TYPE = construct.Struct(
        u'addr_type', construct.UBInt16(u'ip_type'),
        construct.UBInt16(u'source_port'),
        construct.UBInt32(u'source_address'),
        construct.UBInt16(u'destination_port'),
        construct.UBInt32(u'destination_address'))

    # au_to_socket_ex // AUT_SOCKET_EX
    # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
    BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
        u'bsm_token_aut_sockinet32_ex', construct.UBInt16(u'socket_domain'),
        construct.UBInt16(u'socket_type'),
        construct.Switch(u'structure_addr_port',
                         _BsmTokenGetSocketDomain, {26: INET6_ADDR_TYPE},
                         default=INET4_ADDR_TYPE))

    # au_to_sock_unix // AUT_SOCKUNIX
    BSM_TOKEN_SOCKET_UNIX = construct.Struct(
        u'bsm_token_au_to_sock_unix', construct.UBInt16(u'family'),
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("path", 1)))

    # au_to_data // au_to_data
    # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
    # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
    # unit_count: number of type values.
    # BSM_TOKEN_DATA has a end field = type * unit_count
    BSM_TOKEN_DATA = construct.Struct(u'bsm_token_data',
                                      construct.UBInt8(u'how_to_print'),
                                      construct.UBInt8(u'data_type'),
                                      construct.UBInt8(u'unit_count'))

    # au_to_attr32 // AUT_ATTR32
    BSM_TOKEN_ATTR32 = construct.Struct(
        u'bsm_token_attr32', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt32(u'device'))

    # au_to_attr64 // AUT_ATTR64
    BSM_TOKEN_ATTR64 = construct.Struct(
        u'bsm_token_attr64', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt64(u'device'))

    # au_to_exit // AUT_EXIT
    BSM_TOKEN_EXIT = construct.Struct(u'bsm_token_exit',
                                      construct.UBInt32(u'status'),
                                      construct.UBInt32(u'return_value'))

    # au_to_newgroups // AUT_NEWGROUPS
    # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
    BSM_TOKEN_GROUPS = construct.UBInt16(u'group_number')

    # au_to_exec_env == au_to_exec_args
    BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

    # au_to_zonename //AUT_ZONENAME
    BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

    # Token ID.
    # List of valid Token_ID.
    # Token_ID -> [NAME_STRUCTURE, STRUCTURE]
    # Only the checked structures are been added to the valid structures lists.
    BSM_TYPE_LIST = {
        17: [u'BSM_TOKEN_FILE', BSM_TOKEN_FILE],
        19: [u'BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER],
        20: [u'BSM_HEADER32', BSM_HEADER32],
        21: [u'BSM_HEADER64', BSM_HEADER64],
        33: [u'BSM_TOKEN_DATA', BSM_TOKEN_DATA],
        34: [u'BSM_TOKEN_IPC', BSM_TOKEN_IPC],
        35: [u'BSM_TOKEN_PATH', BSM_TOKEN_PATH],
        36: [u'BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32],
        38: [u'BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32],
        39: [u'BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32],
        40: [u'BSM_TOKEN_TEXT', BSM_TOKEN_TEXT],
        41: [u'BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE],
        42: [u'BSM_TOKEN_ADDR', BSM_TOKEN_ADDR],
        43: [u'BSM_TOKEN_IP', BSM_TOKEN_IP],
        44: [u'BSM_TOKEN_PORT', BSM_TOKEN_PORT],
        45: [u'BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32],
        47: [u'BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE],
        96: [u'BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME],
        113: [u'BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64],
        114: [u'BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64],
        116: [u'BSM_HEADER32_EX', BSM_HEADER32_EX],
        119: [u'BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64],
        122: [u'BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX],
        127: [u'BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX],
        128: [u'BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32]
    }

    # Untested structures.
    # When not tested structure is found, we try to parse using also
    # these structures.
    BSM_TYPE_LIST_NOT_TESTED = {
        49: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        50: [u'BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM],
        52: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        59: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        60: [u'BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS],
        61: [u'BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV],
        62: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        82: [u'BSM_TOKEN_EXIT', BSM_TOKEN_EXIT],
        115: [u'BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64],
        117: [u'BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64],
        123: [u'BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX],
        124: [u'BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX],
        125: [u'BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX],
        126: [u'BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT],
        129: [u'BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128],
        130: [u'BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX]
    }

    def __init__(self):
        """Initializes a parser object."""
        super(BsmParser, self).__init__()
        # Create the dictionary with all token IDs: tested and untested.
        self.bsm_type_list_all = self.BSM_TYPE_LIST.copy()
        self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

    def _CopyByteArrayToBase16String(self, byte_array):
        """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array: A byte array.

    Returns:
      A base-16 encoded Unicode string.
    """
        return u''.join([u'{0:02x}'.format(byte) for byte in byte_array])

    def _CopyUtf8ByteArrayToString(self, byte_array):
        """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array: A byte array containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        byte_stream = b''.join(map(chr, byte_array))

        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(u'Unable to decode UTF-8 formatted byte array.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')

        string, _, _ = string.partition(b'\x00')
        return string

    def _IPv4Format(self, address):
        """Change an integer IPv4 address value for its 4 octets representation.

    Args:
      address: integer with the IPv4 address.

    Returns:
      IPv4 address in 4 octet representation (class A, B, C, D).
    """
        ipv4_string = self.IPV4_STRUCT.build(address)
        return socket.inet_ntoa(ipv4_string)

    def _IPv6Format(self, high, low):
        """Provide a readable IPv6 IP having the high and low part in 2 integers.

    Args:
      high: 64 bits integers number with the high part of the IPv6.
      low: 64 bits integers number with the low part of the IPv6.

    Returns:
      String with a well represented IPv6.
    """
        ipv6_string = self.IPV6_STRUCT.build(
            construct.Container(high=high, low=low))
        # socket.inet_ntop not supported in Windows.
        if hasattr(socket, u'inet_ntop'):
            return socket.inet_ntop(socket.AF_INET6, ipv6_string)

        # TODO: this approach returns double "::", illegal IPv6 addr.
        str_address = binascii.hexlify(ipv6_string)
        address = []
        blank = False
        for pos in range(0, len(str_address), 4):
            if str_address[pos:pos + 4] == u'0000':
                if not blank:
                    address.append(u'')
                    blank = True
            else:
                blank = False
                address.append(str_address[pos:pos + 4].lstrip(u'0'))
        return u':'.join(address)

    def _RawToUTF8(self, byte_stream):
        """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream: A byte stream containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(
                u'Decode UTF8 failed, the message string may be cut short.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')
        return string.partition(b'\x00')[0]

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a BSM file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            is_bsm = self.VerifyFile(parser_mediator, file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse BSM file with error: {0:s}'.format(
                    exception))

        if not is_bsm:
            raise errors.UnableToParseFile(u'Not a BSM File, unable to parse.')

        event_object = self.ReadBSMEvent(parser_mediator, file_object)
        while event_object:
            parser_mediator.ProduceEvent(event_object)

            event_object = self.ReadBSMEvent(parser_mediator, file_object)

    def ReadBSMEvent(self, parser_mediator, file_object):
        """Returns a BsmEvent from a single BSM entry.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Returns:
      An event object.
    """
        # A list of tokens that has the entry.
        extra_tokens = []

        offset = file_object.tell()

        # Token header, first token for each entry.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        if bsm_type == u'BSM_HEADER32':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER64':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER32_EX':
            token = structure.parse_stream(file_object)
        else:
            logging.warning(
                u'Token ID Header {0} not expected at position 0x{1:X}.'
                u'The parsing of the file cannot be continued'.format(
                    token_id, file_object.tell()))
            # TODO: if it is a Mac OS X, search for the trailer magic value
            #       as a end of the entry can be a possibility to continue.
            return

        length = token.bsm_header.length
        event_type = u'{0} ({1})'.format(
            bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type,
                                         u'UNKNOWN'),
            token.bsm_header.event_type)
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            token.timestamp, token.microsecond)

        # Read until we reach the end of the record.
        while file_object.tell() < (offset + length):
            # Check if it is a known token.
            try:
                token_id = self.BSM_TYPE.parse_stream(file_object)
            except (IOError, construct.FieldError):
                logging.warning(
                    u'Unable to parse the Token ID at position: {0:d}'.format(
                        file_object.tell()))
                return
            if not token_id in self.BSM_TYPE_LIST:
                pending = (offset + length) - file_object.tell()
                extra_tokens.extend(
                    self.TryWithUntestedStructures(file_object, token_id,
                                                   pending))
            else:
                token = self.BSM_TYPE_LIST[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))

        if file_object.tell() > (offset + length):
            logging.warning(u'Token ID {0} not expected at position 0x{1:X}.'
                            u'Jumping for the next entry.'.format(
                                token_id, file_object.tell()))
            try:
                file_object.seek((offset + length) - file_object.tell(),
                                 os.SEEK_CUR)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to jump to next entry with error: {0:s}'.format(
                        exception))
                return

        # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
        if parser_mediator.platform == u'MacOSX':
            # In Mac OS X the last two tokens are the return status and the trailer.
            if len(extra_tokens) >= 2:
                return_value = extra_tokens[-2:-1][0]
                if (return_value.startswith(u'[BSM_TOKEN_RETURN32')
                        or return_value.startswith(u'[BSM_TOKEN_RETURN64')):
                    _ = extra_tokens.pop(len(extra_tokens) - 2)
                else:
                    return_value = u'Return unknown'
            else:
                return_value = u'Return unknown'
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return MacBsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                               return_value, trailer, offset)
        else:
            # Generic BSM format.
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return BsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                            trailer, offset)

    def VerifyFile(self, parser_mediator, file_object):
        """Check if the file is a BSM file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_event: file that we want to check.

    Returns:
      True if this is a valid BSM file, otherwise False.
    """
        if file_object.tell() != 0:
            file_object.seek(0)

        # First part of the entry is always a Header.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False
        if token_id not in self.BSM_TYPE_LIST:
            return False

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        try:
            if bsm_type == u'BSM_HEADER32':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER64':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER32_EX':
                header = structure.parse_stream(file_object)
            else:
                return False
        except (IOError, construct.FieldError):
            return False
        if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
            return False

        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False

        # If is Mac OS X BSM file, next entry is a  text token indicating
        # if it is a normal start or it is a recovery track.
        if parser_mediator.platform == u'MacOSX':
            bsm_type_list = self.BSM_TYPE_LIST.get(token_id)
            if not bsm_type_list:
                return False

            if bsm_type_list[0] != u'BSM_TOKEN_TEXT':
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False
            try:
                token = self.BSM_TOKEN_TEXT.parse_stream(file_object)
            except (IOError, construct.FieldError):
                return

            text = self._CopyUtf8ByteArrayToString(token.text)
            if (text != u'launchctl::Audit startup'
                    and text != u'launchctl::Audit recovery'):
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False

        file_object.seek(0)
        return True

    def TryWithUntestedStructures(self, file_object, token_id, pending):
        """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
        # Data from the unknown structure.
        start_position = file_object.tell()
        start_token_id = token_id
        extra_tokens = []

        # Read all the "pending" bytes.
        try:
            if token_id in self.bsm_type_list_all:
                token = self.bsm_type_list_all[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))
                while file_object.tell() < (start_position + pending):
                    # Check if it is a known token.
                    try:
                        token_id = self.BSM_TYPE.parse_stream(file_object)
                    except (IOError, construct.FieldError):
                        logging.warning(
                            u'Unable to parse the Token ID at position: {0:d}'.
                            format(file_object.tell()))
                        return
                    if token_id not in self.bsm_type_list_all:
                        break
                    token = self.bsm_type_list_all[token_id][1].parse_stream(
                        file_object)
                    extra_tokens.append(
                        self.FormatToken(token_id, token, file_object))
        except (IOError, construct.FieldError):
            token_id = 255

        next_entry = (start_position + pending)
        if file_object.tell() != next_entry:
            # Unknown Structure.
            logging.warning(
                u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
                    start_position - 1, token_id, token_id))
            # TODO: another way to save this information must be found.
            extra_tokens.append(u'Plaso: some tokens from this entry can '
                                u'not be saved. Entry at 0x{0:X} with unknown '
                                u'token id "0x{1:X}".'.format(
                                    start_position - 1, start_token_id))
            # Move to next entry.
            file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
            # It returns null list because it doesn't know witch structure was
            # the incorrect structure that makes that it can arrive to the spected
            # end of the entry.
            return []
        return extra_tokens

    # TODO: instead of compare the text to know what structure was parsed
    #       is better to compare directly the numeric number (token_id),
    #       less readable, but better performance.
    def FormatToken(self, token_id, token, file_object):
        """Parse the Token depending of the type of the structure.

    Args:
      token_id: Identification integer of the token_type.
      token: Token struct to parse.
      file_object: BSM file.

    Returns:
      String with the parsed Token values.
    """
        if token_id not in self.bsm_type_list_all:
            return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id)

        bsm_type, _ = self.bsm_type_list_all.get(token_id, [u'', u''])

        if bsm_type in [
                u'BSM_TOKEN_TEXT', u'BSM_TOKEN_PATH', u'BSM_TOKEN_ZONENAME'
        ]:
            try:
                string = self._CopyUtf8ByteArrayToString(token.text)
            except TypeError:
                string = u'Unknown'
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type in [
                u'BSM_TOKEN_RETURN32', u'BSM_TOKEN_RETURN64', u'BSM_TOKEN_EXIT'
        ]:
            return u'[{0}: {1} ({2}), System call status: {3}]'.format(
                bsm_type, bsmtoken.BSM_ERRORS.get(token.status, u'Unknown'),
                token.status, token.return_value)

        elif bsm_type in [u'BSM_TOKEN_SUBJECT32', u'BSM_TOKEN_SUBJECT64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_SUBJECT32_EX', u'BSM_TOKEN_SUBJECT64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type in [u'BSM_TOKEN_ARGUMENT32', u'BSM_TOKEN_ARGUMENT64']:
            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format(
                bsm_type, string, token.num_arg, token.name_arg)

        elif bsm_type in [u'BSM_TOKEN_EXEC_ARGUMENTS', u'BSM_TOKEN_EXEC_ENV']:
            arguments = []
            for _ in range(0, token):
                sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(
                    file_object)
                string = self._CopyUtf8ByteArrayToString(sub_token.text)
                arguments.append(string)
            return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32':
            return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv4Format(token.ipv4)))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET128':
            return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_ADDR':
            return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token))

        elif bsm_type == u'BSM_TOKEN_IP':
            return u'[IPv4_Header: 0x{0:s}]'.format(token.encode(u'hex'))

        elif bsm_type == u'BSM_TOKEN_ADDR_EXT':
            return u'[{0}: {1} ({2}). Address {3}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_PORT':
            return u'[{0}: {1}]'.format(bsm_type, token)

        elif bsm_type == u'BSM_TOKEN_TRAILER':
            return u'[{0}: {1}]'.format(bsm_type, token.record_length)

        elif bsm_type == u'BSM_TOKEN_FILE':
            # TODO: if this timestamp is usefull, it must be extracted as a separate
            #       event object.
            timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
                token.timestamp, token.microsecond)
            date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
            date_time_string = date_time.strftime(u'%Y-%m-%d %H:%M:%S')

            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}, timestamp: {2:s}]'.format(
                bsm_type, string, date_time_string)

        elif bsm_type == u'BSM_TOKEN_IPC':
            return u'[{0}: object type {1}, object id {2}]'.format(
                bsm_type, token.object_type, token.object_id)

        elif bsm_type in [u'BSM_TOKEN_PROCESS32', u'BSM_TOKEN_PROCESS64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_PROCESS32_EX', u'BSM_TOKEN_PROCESS64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type == u'BSM_TOKEN_DATA':
            data = []
            data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, u'')
            if data_type == u'AUR_CHAR':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))
            elif data_type == u'AUR_SHORT':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object))
            elif data_type == u'AUR_INT32':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))
            else:
                data.append(u'Unknown type data')
            # TODO: the data when it is string ends with ".", HW a space is return
            #       after uses the UTF-8 conversion.
            return u'[{0}: Format data: {1}, Data: {2}]'.format(
                bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
                self._RawToUTF8(u''.join(data)))

        elif bsm_type in [u'BSM_TOKEN_ATTR32', u'BSM_TOKEN_ATTR64']:
            return (u'[{0}: Mode: {1}, UID: {2}, GID: {3}, '
                    u'File system ID: {4}, Node ID: {5}, Device: {6}]').format(
                        bsm_type, token.file_mode, token.uid, token.gid,
                        token.file_system_id, token.file_system_node_id,
                        token.device)

        elif bsm_type == u'BSM_TOKEN_GROUPS':
            arguments = []
            for _ in range(token):
                arguments.append(
                    self._RawToUTF8(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
            return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32_EX':
            if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain,
                                          u'') == u'INET6':
                saddr = self._IPv6Format(token.structure_addr_port.saddr_high,
                                         token.structure_addr_port.saddr_low)
                daddr = self._IPv6Format(token.structure_addr_port.daddr_high,
                                         token.structure_addr_port.daddr_low)
            else:
                saddr = self._IPv4Format(
                    token.structure_addr_port.source_address)
                daddr = self._IPv4Format(
                    token.structure_addr_port.destination_address)

            return u'[{0}: from {1} port {2} to {3} port {4}]'.format(
                bsm_type, saddr, token.structure_addr_port.source_port, daddr,
                token.structure_addr_port.destination_port)

        elif bsm_type == u'BSM_TOKEN_IPC_PERM':
            return (u'[{0}: user id {1}, group id {2}, create user id {3}, '
                    u'create group id {4}, access {5}]').format(
                        bsm_type, token.user_id, token.group_id,
                        token.creator_user_id, token.creator_group_id,
                        token.access_mode)

        elif bsm_type == u'BSM_TOKEN_SOCKET_UNIX':
            string = self._CopyUtf8ByteArrayToString(token.path)
            return u'[{0}: Family {1}, Path {2:s}]'.format(
                bsm_type, token.family, string)

        elif bsm_type == u'BSM_TOKEN_OPAQUE':
            string = self._CopyByteArrayToBase16String(token.text)
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type == u'BSM_TOKEN_SEQUENCE':
            return u'[{0}: {1}]'.format(bsm_type, token)

Example #10

Show file

File: tracker.py Project: romanz/pybt

import bencode

import construct as c
from gevent import socket

class Error(Exception):
    pass

MAX_PACKET_SIZE = 2**16

log = logging.getLogger('tracker')

INIT_ID = 0x41727101980

connect_req = c.Struct('request', 
    c.UBInt64('connection_id'),
    c.UBInt32('action'),
    c.UBInt32('transaction_id')
)

connect_resp = c.Struct('response',
    c.UBInt32('action'),
    c.UBInt32('transaction_id'),
    c.UBInt64('connection_id'),
)

announce_req = c.Struct('request',
    c.UBInt64('connection_id'),
    c.UBInt32('action'),
    c.UBInt32('transaction_id'),
    c.Bytes('info_hash', 20),

Example #11

Show file

class JavaIDXParser(interface.FileObjectParser):
    """Parse Java WebStart Cache IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    _INITIAL_FILE_OFFSET = None

    NAME = u'java_idx'
    DESCRIPTION = u'Parser for Java WebStart Cache IDX files.'

    IDX_SHORT_STRUCT = construct.Struct(u'magic', construct.UBInt8(u'busy'),
                                        construct.UBInt8(u'incomplete'),
                                        construct.UBInt32(u'idx_version'))

    IDX_602_STRUCT = construct.Struct(
        u'IDX_602_Full', construct.UBInt16(u'null_space'),
        construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'),
        construct.UBInt64(u'last_modified_date'),
        construct.UBInt64(u'expiration_date'),
        construct.PascalString(u'version_string',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'url',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'namespace',
                               length_field=construct.UBInt16(u'length')),
        construct.UBInt32(u'FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        u'IDX_605_Section1', construct.UBInt8(u'shortcut'),
        construct.UBInt32(u'content_length'),
        construct.UBInt64(u'last_modified_date'),
        construct.UBInt64(u'expiration_date'),
        construct.UBInt64(u'validation_date'), construct.UBInt8(u'signed'),
        construct.UBInt32(u'sec2len'), construct.UBInt32(u'sec3len'),
        construct.UBInt32(u'sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        u'IDX_605_Section2',
        construct.PascalString(u'version',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'url',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'namespec',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'ip_address',
                               length_field=construct.UBInt16(u'length')),
        construct.UBInt32(u'FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        u'Java.ReadUTF',
        construct.PascalString(u'string',
                               length_field=construct.UBInt16(u'length')))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Java WebStart Cache IDX file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = u'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128,
                                 os.SEEK_SET)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = u'Unknown'
                ip_address = u'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == u'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                try:
                    download_date = timelib.Timestamp.FromTimeString(
                        value.string, gmt_as_timezone=False)
                except errors.TimestampError:
                    download_date = None
                    parser_mediator.ProduceExtractionError(
                        u'Unable to parse time value: {0:s}'.format(
                            value.string))

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        event_data = JavaIDXEventData()
        event_data.idx_version = magic.idx_version
        event_data.ip_address = ip_address
        event_data.url = url

        date_time = dfdatetime_java_time.JavaTime(timestamp=last_modified_date)
        # TODO: Move the timestamp description into eventdata.
        event = time_events.DateTimeValuesEvent(date_time, u'File Hosted Date')
        parser_mediator.ProduceEventWithEventData(event, event_data)

        if section_one:
            expiration_date = section_one.get(u'expiration_date', None)
            if expiration_date:
                date_time = dfdatetime_java_time.JavaTime(
                    timestamp=expiration_date)
                event = time_events.DateTimeValuesEvent(
                    date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
                parser_mediator.ProduceEventWithEventData(event, event_data)

        if download_date:
            event = time_events.TimestampEvent(
                download_date, definitions.TIME_DESCRIPTION_FILE_DOWNLOADED)
            parser_mediator.ProduceEventWithEventData(event, event_data)