Example #1
0
class TimeMachinePlugin(interface.PlistPlugin):
    """Basic plugin to extract time machine hardisk and the backups.

  Further details about the extracted fields:
    DestinationID:
      remote UUID hard disk where the backup is done.

    BackupAlias:
      structure that contains the extra information from the destinationID.

    SnapshotDates:
      list of the backup dates.
  """

    NAME = 'time_machine'
    DESCRIPTION = 'Parser for TimeMachine plist files.'

    PLIST_PATH = 'com.apple.TimeMachine.plist'
    PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID'])

    TM_BACKUP_ALIAS = construct.Struct(
        'tm_backup_alias', construct.Padding(10),
        construct.PascalString('value',
                               length_field=construct.UBInt8('length')))

    # pylint: disable=arguments-differ
    def GetEntries(self, parser_mediator, match=None, **unused_kwargs):
        """Extracts relevant TimeMachine entries.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      match (Optional[dict[str: object]]): keys extracted from PLIST_KEYS.
    """
        destinations = match.get('Destinations', [])
        for destination in destinations:
            destination_identifier = (destination.get('DestinationID', None)
                                      or 'Unknown device')

            alias = destination.get('BackupAlias', '<ALIAS>')
            try:
                alias = self.TM_BACKUP_ALIAS.parse(alias).value
            except construct.FieldError:
                alias = 'Unknown alias'

            event_data = plist_event.PlistTimeEventData()
            event_data.desc = 'TimeMachine Backup in {0:s} ({1:s})'.format(
                alias, destination_identifier)
            event_data.key = 'item/SnapshotDates'
            event_data.root = '/Destinations'

            snapshot_dates = destination.get('SnapshotDates', [])
            for datetime_value in snapshot_dates:
                timestamp = timelib.Timestamp.FromPythonDatetime(
                    datetime_value)
                date_time = dfdatetime_posix_time.PosixTimeInMicroseconds(
                    timestamp=timestamp)
                event = time_events.DateTimeValuesEvent(
                    date_time, definitions.TIME_DESCRIPTION_WRITTEN)
                parser_mediator.ProduceEventWithEventData(event, event_data)
Example #2
0
class TimeMachinePlugin(interface.PlistPlugin):
    """Basic plugin to extract time machine hardisk and the backups.

  Further details about the extracted fields:
    DestinationID:
      remote UUID hard disk where the backup is done.

    BackupAlias:
      structure that contains the extra information from the destinationID.

    SnapshotDates:
      list of the backup dates.
  """

    NAME = u'time_machine'
    DESCRIPTION = u'Parser for TimeMachine plist files.'

    PLIST_PATH = u'com.apple.TimeMachine.plist'
    PLIST_KEYS = frozenset([u'Destinations', u'RootVolumeUUID'])

    TM_BACKUP_ALIAS = construct.Struct(
        u'tm_backup_alias', construct.Padding(10),
        construct.PascalString(u'value',
                               length_field=construct.UBInt8(u'length')))

    def GetEntries(self, parser_mediator, match=None, **unused_kwargs):
        """Extracts relevant TimeMachine entries.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      match: Optional dictionary containing keys extracted from PLIST_KEYS.
             The default is None.
    """
        if u'Destinations' not in match:
            return

        root = u'/Destinations'
        key = u'item/SnapshotDates'

        # For each TimeMachine devices.
        for destination in match[u'Destinations']:
            hd_uuid = destination.get(u'DestinationID', None)
            if not hd_uuid:
                hd_uuid = u'Unknown device'

            alias = destination.get(u'BackupAlias', u'<ALIAS>')
            try:
                alias = self.TM_BACKUP_ALIAS.parse(alias).value
            except construct.FieldError:
                alias = u'Unknown alias'

            # For each Backup.
            for timestamp in destination.get(u'SnapshotDates', []):
                description = u'TimeMachine Backup in {0:s} ({1:s})'.format(
                    alias, hd_uuid)
                event_object = plist_event.PlistEvent(root, key, timestamp,
                                                      description)
                parser_mediator.ProduceEvent(event_object)
Example #3
0
class TimeMachinePlugin(interface.PlistPlugin):
    """Basic plugin to extract time machine hardisk and the backups."""

    NAME = 'plist_timemachine'
    DESCRIPTION = u'Parser for TimeMachine plist files.'

    PLIST_PATH = 'com.apple.TimeMachine.plist'
    PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID'])

    # Generated events:
    # DestinationID: remote UUID hard disk where the backup is done.
    # BackupAlias: structure that contains the extra information from the
    #              destinationID.
    # SnapshotDates: list of the backup dates.

    TM_BACKUP_ALIAS = construct.Struct(
        'tm_backup_alias', construct.Padding(10),
        construct.PascalString('value',
                               length_field=construct.UBInt8('length')))

    def GetEntries(self, parser_context, match=None, **unused_kwargs):
        """Extracts relevant TimeMachine entries.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      match: Optional dictionary containing keys extracted from PLIST_KEYS.
             The default is None.
    """
        root = '/Destinations'
        key = 'item/SnapshotDates'

        # For each TimeMachine devices.
        for destination in match['Destinations']:
            hd_uuid = destination['DestinationID']
            if not hd_uuid:
                hd_uuid = u'Unknown device'
            alias = destination['BackupAlias']
            try:
                alias = self.TM_BACKUP_ALIAS.parse(alias).value
            except construct.FieldError:
                alias = u'Unknown alias'
            # For each Backup.
            for timestamp in destination['SnapshotDates']:
                description = u'TimeMachine Backup in {0:s} ({1:s})'.format(
                    alias, hd_uuid)
                event_object = plist_event.PlistEvent(root, key, timestamp,
                                                      description)
                parser_context.ProduceEvent(event_object,
                                            plugin_name=self.NAME)
Example #4
0
class TimeMachinePlugin(interface.PlistPlugin):
    """Basic plugin to extract time machine hardisk and the backups."""

    NAME = 'plist_timemachine'

    PLIST_PATH = 'com.apple.TimeMachine.plist'
    PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID'])

    # Yield Events
    #
    # DestinationID: remote UUID hard disk where the backup is done.
    # BackupAlias: structure that contains the extra information from the
    #              destinationID.
    # SnapshotDates: list of the backup dates.

    TM_BACKUP_ALIAS = construct.Struct(
        'tm_backup_alias', construct.Padding(10),
        construct.PascalString('value',
                               length_field=construct.UBInt8('length')))

    def GetEntries(self, match, **unused_kwargs):
        """Extracts relevant TimeMachine entries.

    Args:
      match: A dictionary containing keys extracted from PLIST_KEYS.

    Yields:
      EventObject objects extracted from the plist.
    """

        root = '/Destinations'
        key = 'item/SnapshotDates'
        # For each TimeMachine devices.
        for destination in match['Destinations']:
            hd_uuid = destination['DestinationID']
            if not hd_uuid:
                hd_uuid = u'Unknown device'
            alias = destination['BackupAlias']
            try:
                alias = self.TM_BACKUP_ALIAS.parse(alias).value
            except construct.FieldError:
                alias = u'Unknown alias'
            # For each Backup.
            for timestamp in destination['SnapshotDates']:
                description = u'TimeMachine Backup in {} ({})'.format(
                    alias, hd_uuid)
                yield plist_event.PlistEvent(root, key, timestamp, description)
Example #5
0
 def __init__(s):
     s.header_cmd0 = construct.Struct('CMD0Header',
         construct.UBInt8('magic'),
         construct.UBInt8('unk_0'),
         construct.UBInt8('unk_1'),
         construct.UBInt8('unk_2'),
         construct.UBInt8('unk_3'),
         construct.UBInt8('flags'),
         construct.UBInt8('id_primary'),
         construct.UBInt8('id_secondary'),
         construct.UBInt16('error_code'),
         construct.UBInt16('payload_size_cmd0')
     )
     s.header_cmd1 = construct.Struct('CMD1Header',
         construct.Padding(48)
     )
     s.header_cmd2 = construct.Struct('CMD2Header',
         construct.ULInt16('JDN_base'),
         construct.Padding(2),
         construct.ULInt32('seconds')
     )
     s.header = construct.Struct('CMDHeader',
         construct.ULInt16('packet_type'),
         construct.ULInt16('cmd_id'),
         construct.ULInt16('payload_size'),
         construct.ULInt16('seq_id'),
         construct.Switch('cmd_hdr', lambda ctx: ctx.cmd_id,
             {
                 0 : construct.If(lambda ctx: ctx.payload_size >= s.header_cmd0.sizeof(), construct.Embed(s.header_cmd0)),
                 1 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd1.sizeof(), construct.Embed(s.header_cmd1)),
                 2 : construct.If(lambda ctx: ctx.payload_size == s.header_cmd2.sizeof(), construct.Embed(s.header_cmd2))
             },
             default = construct.Pass
         )
     )
     s.cmd_handlers = {
         0 : s.cmd0,
         1 : s.cmd1,
         2 : s.cmd2
     }
     s.cmd0_handlers = {
         5 : { 6 : s.cmd0_5_6 },
     }
Example #6
0
def PrefixedBytes(name, length_field=construct.UBInt8("length")):  # noqa
    """
    Length-prefixed binary data.  This is like a
    :py:func:`construct.macros.PascalString` that raises a
    :py:class:`constrcut.AdaptationError` when encoding something
    other than :py:class:`bytes`.

    :param name: The attribute name under which this value will be
        accessible.
    :type name: :py:class:`str`

    :param length_field: (optional) The prefixed length field.
        Defaults to :py:func:`construct.macros.UBInt8`.
    :type length_field: a :py:class:`construct.core.FormatField`
    """
    return construct.LengthValueAdapter(
        construct.Sequence(
            name, length_field,
            BytesAdapter(
                construct.Field("data",
                                operator.attrgetter(length_field.name)))))
Example #7
0
import os
import repoze.lru
import signal
import struct
import sys
import traceback
import traffic_control

# Control message for our protocol; first few bits are special as we have to
# maintain compatibility with LTPv3 in the kernel (first bit must be 1); also
# the packet must be at least 12 bytes in length, otherwise some firewalls
# may filter it when used over port 53
ControlMessage = cs.Struct(
    "control",
    # Ensure that the first bit is 1 (L2TP control packet)
    cs.Const(cs.UBInt8("magic1"), 0x80),
    # Reduce conflict matching to other protocols as we run on port 53
    cs.Const(cs.UBInt16("magic2"), 0x73A7),
    # Protocol version to allow future upgrades
    cs.UBInt8("version"),
    # Message type
    cs.UBInt8("type"),
    # Message data (with length prefix)
    cs.PascalString("data"),
    # Pad the message so it is at least 12 bytes long
    cs.Padding(lambda ctx: max(0, 6 - len(ctx["data"]))),
)

# Unreliable messages (0x00 - 0x7F)
CONTROL_TYPE_COOKIE = 0x01
CONTROL_TYPE_PREPARE = 0x02
Example #8
0
class CupsIppParser(interface.FileObjectParser):
    """Parser for CUPS IPP files. """

    NAME = 'cups_ipp'
    DESCRIPTION = 'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct',
                                       construct.UBInt8('major_version'),
                                       construct.UBInt8('minor_version'),
                                       construct.UBInt16('operation_id'),
                                       construct.UBInt32('request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID, per cups source file ipp-support.c.
    TYPE_GENERAL_INTEGER = 0x20
    TYPE_INTEGER = 0x21
    TYPE_BOOL = 0x22
    TYPE_ENUMERATION = 0x23
    TYPE_DATETIME = 0x31

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8('integer')
    INTEGER_32 = construct.UBInt32('integer')
    TEXT = construct.PascalString('text',
                                  length_field=construct.UBInt8('length'))
    BOOLEAN = construct.Struct('boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct('integer_value', construct.Padding(1),
                               INTEGER_32)

    # This is an RFC2579 datetime.
    DATETIME = construct.Struct(
        'datetime',
        construct.Padding(1),
        construct.UBInt16('year'),
        construct.UBInt8('month'),
        construct.UBInt8('day'),
        construct.UBInt8('hour'),
        construct.UBInt8('minutes'),
        construct.UBInt8('seconds'),
        construct.UBInt8('deciseconds'),
        construct.String('direction_from_utc', length=1, encoding='ascii'),
        construct.UBInt8('hours_from_utc'),
        construct.UBInt8('minutes_from_utc'),
    )

    # Name of the pair.
    PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    _NAME_PAIR_TRANSLATION = {
        'com.apple.print.JobInfo.PMApplicationName': 'application',
        'com.apple.print.JobInfo.PMJobOwner': 'owner',
        'DestinationPrinterID': 'printer_id',
        'document-format': 'doc_type',
        'job-name': 'job_name',
        'job-originating-host-name': 'computer_name',
        'job-originating-user-name': 'user',
        'job-uuid': 'job_id',
        'printer-uri': 'uri'
    }

    _DATE_TIME_VALUES = {
        'date-time-at-creation': definitions.TIME_DESCRIPTION_CREATION,
        'date-time-at-processing': definitions.TIME_DESCRIPTION_START,
        'date-time-at-completed': definitions.TIME_DESCRIPTION_END
    }

    _POSIX_TIME_VALUES = {
        'time-at-creation': definitions.TIME_DESCRIPTION_CREATION,
        'time-at-processing': definitions.TIME_DESCRIPTION_START,
        'time-at-completed': definitions.TIME_DESCRIPTION_END
    }

    _DATE_TIME_VALUE_NAMES = list(_DATE_TIME_VALUES.keys())
    _DATE_TIME_VALUE_NAMES.extend(list(_POSIX_TIME_VALUES.keys()))

    def _GetStringValue(self, data_dict, name, default_value=None):
        """Retrieves a specific string value from the data dict.

    Args:
      data_dict (dict[str, list[str]): values per name.
      name (str): name of the value to retrieve.

    Returns:
      str: value represented as a string.
    """
        values = data_dict.get(name, None)
        if not values:
            return default_value

        for index, value in enumerate(values):
            if ',' in value:
                values[index] = '"{0:s}"'.format(value)

        return ', '.join(values)

    def _ReadPair(self, parser_mediator, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      tuple: contains:

        str: name or None.
        str: value or None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse pair identifier with error: {0!s}'.format(
                    exception))
            return None, None

        # Name = Length name + name + 0x00
        try:
            pair_name = self.PAIR_NAME.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse pair name with error: {0!s}'.format(
                    exception))
            return None, None

        try:
            name = pair_name.text.decode('utf-8')
        except UnicodeDecodeError as exception:
            parser_mediator.ProduceExtractionError(
                'unable to decode pair name with error: {0!s}'.format(
                    exception))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                       self.TYPE_ENUMERATION):
            value_structure = self.INTEGER
        elif type_id == self.TYPE_BOOL:
            value_structure = self.BOOLEAN
        elif type_id == self.TYPE_DATETIME:
            value_structure = self.DATETIME
        else:
            value_structure = self.TEXT

        try:
            value = value_structure.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse value with error: {0!s}'.format(exception))
            return None, None

        if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                       self.TYPE_ENUMERATION):
            value = value.integer

        elif type_id == self.TYPE_BOOL:
            value = bool(value.integer)

        elif type_id == self.TYPE_DATETIME:
            rfc2579_date_time_tuple = (value.year, value.month, value.day,
                                       value.hour, value.minutes,
                                       value.seconds, value.deciseconds,
                                       value.direction_from_utc,
                                       value.hours_from_utc,
                                       value.minutes_from_utc)
            value = dfdatetime_rfc2579_date_time.RFC2579DateTime(
                rfc2579_date_time_tuple=rfc2579_date_time_tuple)

        else:
            try:
                value = value.decode('utf-8')
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceExtractionError(
                    'unable to decode value with error: {0!s}'.format(
                        exception))
                return None, None

        return name, value

    def _ReadPairs(self, parser_mediator, file_object):
        """Reads the attribute name and value pairs from a CUPS IPP event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      dict[str, list[str]]: values per name.
    """
        data_dict = {}

        name, value = self._ReadPair(parser_mediator, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self._NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self._ReadPair(parser_mediator, file_object)

        return data_dict

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a CUPS IPP file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                'Unable to parse CUPS IPP Header with error: {0!s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            raise errors.UnableToParseFile(
                '[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logger.debug(
                '[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_mediator.GetDisplayName()))

        data_dict = self._ReadPairs(parser_mediator, file_object)

        time_dict = {}

        for name in self._DATE_TIME_VALUE_NAMES:
            value = data_dict.get(name, None)
            if value is not None:
                time_dict[name] = value
                del data_dict[name]

        event_data = CupsIppEventData()
        event_data.application = self._GetStringValue(data_dict, 'application')
        event_data.computer_name = self._GetStringValue(
            data_dict, 'computer_name')
        event_data.copies = data_dict.get('copies', [0])[0]
        event_data.data_dict = data_dict
        event_data.doc_type = self._GetStringValue(data_dict, 'doc_type')
        event_data.job_id = self._GetStringValue(data_dict, 'job_id')
        event_data.job_name = self._GetStringValue(data_dict, 'job_name')
        event_data.user = self._GetStringValue(data_dict, 'user')
        event_data.owner = self._GetStringValue(data_dict, 'owner')
        event_data.printer_id = self._GetStringValue(data_dict, 'printer_id')
        event_data.uri = self._GetStringValue(data_dict, 'uri')

        for name, usage in iter(self._DATE_TIME_VALUES.items()):
            time_values = time_dict.get(name, [])
            for date_time in time_values:
                event = time_events.DateTimeValuesEvent(date_time, usage)
                parser_mediator.ProduceEventWithEventData(event, event_data)

        for name, usage in iter(self._POSIX_TIME_VALUES.items()):
            time_values = time_dict.get(name, [])
            for time_value in time_values:
                date_time = dfdatetime_posix_time.PosixTime(
                    timestamp=time_value)
                event = time_events.DateTimeValuesEvent(date_time, usage)
                parser_mediator.ProduceEventWithEventData(event, event_data)
Example #9
0
class DataBlockFile(object):
    """Class that contains a data block file."""

    SIGNATURE = 0xc104cac3

    _FILE_HEADER = construct.Struct(
        u'chrome_cache_data_file_header', construct.ULInt32(u'signature'),
        construct.ULInt16(u'minor_version'),
        construct.ULInt16(u'major_version'), construct.ULInt16(u'file_number'),
        construct.ULInt16(u'next_file_number'),
        construct.ULInt32(u'block_size'),
        construct.ULInt32(u'number_of_entries'),
        construct.ULInt32(u'maximum_number_of_entries'),
        construct.Array(4, construct.ULInt32(u'emtpy')),
        construct.Array(4, construct.ULInt32(u'hints')),
        construct.ULInt32(u'updating'),
        construct.Array(5, construct.ULInt32(u'user')))

    _CACHE_ENTRY = construct.Struct(
        u'chrome_cache_entry', construct.ULInt32(u'hash'),
        construct.ULInt32(u'next_address'),
        construct.ULInt32(u'rankings_node_address'),
        construct.ULInt32(u'reuse_count'), construct.ULInt32(u'refetch_count'),
        construct.ULInt32(u'state'), construct.ULInt64(u'creation_time'),
        construct.ULInt32(u'key_size'), construct.ULInt32(u'long_key_address'),
        construct.Array(4, construct.ULInt32(u'data_stream_sizes')),
        construct.Array(4, construct.ULInt32(u'data_stream_addresses')),
        construct.ULInt32(u'flags'), construct.Padding(16),
        construct.ULInt32(u'self_hash'),
        construct.Array(160, construct.UBInt8(u'key')))

    def __init__(self):
        """Initializes the data block file object."""
        super(DataBlockFile, self).__init__()
        self._file_object = None
        self.creation_time = None
        self.block_size = None
        self.number_of_entries = None
        self.version = None

    def _ReadFileHeader(self):
        """Reads the file header.

    Raises:
      IOError: if the file header cannot be read.
    """
        self._file_object.seek(0, os.SEEK_SET)

        try:
            file_header = self._FILE_HEADER.parse_stream(self._file_object)
        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse file header with error: {0:s}'.format(
                    exception))

        signature = file_header.get(u'signature')

        if signature != self.SIGNATURE:
            raise IOError(u'Unsupported data block file signature')

        self.version = u'{0:d}.{1:d}'.format(file_header.get(u'major_version'),
                                             file_header.get(u'minor_version'))

        if self.version not in [u'2.0', u'2.1']:
            raise IOError(u'Unsupported data block file version: {0:s}'.format(
                self.version))

        self.block_size = file_header.get(u'block_size')
        self.number_of_entries = file_header.get(u'number_of_entries')

    def ReadCacheEntry(self, block_offset):
        """Reads a cache entry.

    Args:
      block_offset: The block offset of the cache entry.

    Returns:
      A cache entry (instance of CacheEntry).
    """
        self._file_object.seek(block_offset, os.SEEK_SET)

        try:
            cache_entry_struct = self._CACHE_ENTRY.parse_stream(
                self._file_object)
        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse cache entry with error: {0:s}'.format(
                    exception))

        cache_entry = CacheEntry()

        cache_entry.hash = cache_entry_struct.get(u'hash')

        cache_entry.next = CacheAddress(
            cache_entry_struct.get(u'next_address'))
        cache_entry.rankings_node = CacheAddress(
            cache_entry_struct.get(u'rankings_node_address'))

        cache_entry.creation_time = cache_entry_struct.get(u'creation_time')

        byte_array = cache_entry_struct.get(u'key')
        byte_string = b''.join(map(chr, byte_array))
        cache_entry.key, _, _ = byte_string.partition(b'\x00')

        return cache_entry

    def Close(self):
        """Closes the data block file."""
        if self._file_object:
            self._file_object.close()
            self._file_object = None

    def Open(self, file_object):
        """Opens the data block file.

    Args:
      file_object: the file object.
    """
        self._file_object = file_object
        self._ReadFileHeader()
Example #10
0
class BsmParser(interface.FileObjectParser):
    """Parser for BSM files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'bsm_log'
    DESCRIPTION = u'Parser for BSM log files.'

    # BSM supported version (0x0b = 11).
    AUDIT_HEADER_VERSION = 11

    # Magic Trail Header.
    BSM_TOKEN_TRAILER_MAGIC = b'b105'

    # IP Version constants.
    AU_IPv4 = 4
    AU_IPv6 = 16

    IPV4_STRUCT = construct.UBInt32(u'ipv4')

    IPV6_STRUCT = construct.Struct(u'ipv6', construct.UBInt64(u'high'),
                                   construct.UBInt64(u'low'))

    # Tested structures.
    # INFO: I have ommited the ID in the structures declaration.
    #       I used the BSM_TYPE first to read the ID, and then, the structure.
    # Tokens always start with an ID value that identifies their token
    # type and subsequent structure.
    BSM_TYPE = construct.UBInt8(u'token_id')

    # Data type structures.
    BSM_TOKEN_DATA_CHAR = construct.String(u'value', 1)
    BSM_TOKEN_DATA_SHORT = construct.UBInt16(u'value')
    BSM_TOKEN_DATA_INTEGER = construct.UBInt32(u'value')

    # Common structure used by other structures.
    # audit_uid: integer, uid that generates the entry.
    # effective_uid: integer, the permission user used.
    # effective_gid: integer, the permission group used.
    # real_uid: integer, user id of the user that execute the process.
    # real_gid: integer, group id of the group that execute the process.
    # pid: integer, identification number of the process.
    # session_id: unknown, need research.
    BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
        u'subject_data', construct.UBInt32(u'audit_uid'),
        construct.UBInt32(u'effective_uid'),
        construct.UBInt32(u'effective_gid'), construct.UBInt32(u'real_uid'),
        construct.UBInt32(u'real_gid'), construct.UBInt32(u'pid'),
        construct.UBInt32(u'session_id'))

    # Common structure used by other structures.
    # Identify the kind of inet (IPv4 or IPv6)
    # TODO: instead of 16, AU_IPv6 must be used.
    BSM_IP_TYPE_SHORT = construct.Struct(
        u'bsm_ip_type_short', construct.UBInt32(u'net_type'),
        construct.Switch(u'ip_addr',
                         _BsmTokenGetNetType, {16: IPV6_STRUCT},
                         default=IPV4_STRUCT))

    # Initial fields structure used by header structures.
    # length: integer, the length of the entry, equal to trailer (doc: length).
    # version: integer, version of BSM (AUDIT_HEADER_VERSION).
    # event_type: integer, the type of event (/etc/security/audit_event).
    # modifier: integer, unknown, need research (It is always 0).
    BSM_HEADER = construct.Struct(u'bsm_header', construct.UBInt32(u'length'),
                                  construct.UBInt8(u'version'),
                                  construct.UBInt16(u'event_type'),
                                  construct.UBInt16(u'modifier'))

    # First token of one entry.
    # timestamp: unsigned integer, number of seconds since
    #            January 1, 1970 00:00:00 UTC.
    # microsecond: unsigned integer, number of micro seconds.
    BSM_HEADER32 = construct.Struct(u'bsm_header32', BSM_HEADER,
                                    construct.UBInt32(u'timestamp'),
                                    construct.UBInt32(u'microsecond'))

    BSM_HEADER64 = construct.Struct(u'bsm_header64', BSM_HEADER,
                                    construct.UBInt64(u'timestamp'),
                                    construct.UBInt64(u'microsecond'))

    BSM_HEADER32_EX = construct.Struct(u'bsm_header32_ex', BSM_HEADER,
                                       BSM_IP_TYPE_SHORT,
                                       construct.UBInt32(u'timestamp'),
                                       construct.UBInt32(u'microsecond'))

    # Token TEXT, provides extra information.
    BSM_TOKEN_TEXT = construct.Struct(
        u'bsm_token_text', construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Path of the executable.
    BSM_TOKEN_PATH = BSM_TOKEN_TEXT

    # Identified the end of the record (follow by TRAILER).
    # status: integer that identifies the status of the exit (BSM_ERRORS).
    # return: returned value from the operation.
    BSM_TOKEN_RETURN32 = construct.Struct(u'bsm_token_return32',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt32(u'return_value'))

    BSM_TOKEN_RETURN64 = construct.Struct(u'bsm_token_return64',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt64(u'return_value'))

    # Identified the number of bytes that was written.
    # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
    # length: integer that has the number of bytes from the entry size.
    BSM_TOKEN_TRAILER = construct.Struct(u'bsm_token_trailer',
                                         construct.UBInt16(u'magic'),
                                         construct.UBInt32(u'record_length'))

    # A 32-bits argument.
    # num_arg: the number of the argument.
    # name_arg: the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT32 = construct.Struct(
        u'bsm_token_argument32', construct.UBInt8(u'num_arg'),
        construct.UBInt32(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # A 64-bits argument.
    # num_arg: integer, the number of the argument.
    # name_arg: text, the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT64 = construct.Struct(
        u'bsm_token_argument64', construct.UBInt8(u'num_arg'),
        construct.UBInt64(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Identify an user.
    # terminal_id: unknown, research needed.
    # terminal_addr: unknown, research needed.
    BSM_TOKEN_SUBJECT32 = construct.Struct(u'bsm_token_subject32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # Identify an user using a extended Token.
    # terminal_port: unknown, need research.
    # net_type: unknown, need research.
    BSM_TOKEN_SUBJECT32_EX = construct.Struct(
        u'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_opaque // AUT_OPAQUE
    BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

    # au_to_seq // AUT_SEQ
    BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

    # Program execution with options.
    # For each argument we are going to have a string+ "\x00".
    # Example: [00 00 00 02][41 42 43 00 42 42 00]
    #          2 Arguments, Arg1: [414243] Arg2: [4242].
    BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32(u'number_arguments')

    BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
        u'bsm_token_exec_argument',
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("text", 1)))

    # au_to_in_addr // AUT_IN_ADDR:
    BSM_TOKEN_ADDR = IPV4_STRUCT

    # au_to_in_addr_ext // AUT_IN_ADDR_EX:
    BSM_TOKEN_ADDR_EXT = construct.Struct(u'bsm_token_addr_ext',
                                          construct.UBInt32(u'net_type'),
                                          IPV6_STRUCT)

    # au_to_ip // AUT_IP:
    # TODO: parse this header in the correct way.
    BSM_TOKEN_IP = construct.String(u'binary_ipv4_add', 20)

    # au_to_ipc // AUT_IPC:
    BSM_TOKEN_IPC = construct.Struct(u'bsm_token_ipc',
                                     construct.UBInt8(u'object_type'),
                                     construct.UBInt32(u'object_id'))

    # au_to_ipc_perm // au_to_ipc_perm
    BSM_TOKEN_IPC_PERM = construct.Struct(
        u'bsm_token_ipc_perm', construct.UBInt32(u'user_id'),
        construct.UBInt32(u'group_id'), construct.UBInt32(u'creator_user_id'),
        construct.UBInt32(u'creator_group_id'),
        construct.UBInt32(u'access_mode'), construct.UBInt32(u'slot_seq'),
        construct.UBInt32(u'key'))

    # au_to_iport // AUT_IPORT:
    BSM_TOKEN_PORT = construct.UBInt16(u'port_number')

    # au_to_file // AUT_OTHER_FILE32:
    BSM_TOKEN_FILE = construct.Struct(
        u'bsm_token_file', construct.UBInt32(u'timestamp'),
        construct.UBInt32(u'microsecond'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # au_to_subject64 // AUT_SUBJECT64:
    BSM_TOKEN_SUBJECT64 = construct.Struct(u'bsm_token_subject64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_subject64_ex // AU_IPv4:
    BSM_TOKEN_SUBJECT64_EX = construct.Struct(
        u'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'),
        construct.UBInt32(u'terminal_type'), BSM_IP_TYPE_SHORT)

    # au_to_process32 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS32 = construct.Struct(u'bsm_token_process32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process64 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS64 = construct.Struct(u'bsm_token_process64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process32_ex // AUT_PROCESS32_EX:
    BSM_TOKEN_PROCESS32_EX = construct.Struct(
        u'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_process64_ex // AUT_PROCESS64_EX:
    BSM_TOKEN_PROCESS64_EX = construct.Struct(
        u'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt64(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_sock_inet32 // AUT_SOCKINET32:
    BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
        u'bsm_token_aut_sockinet32', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV4_STRUCT)

    # Info: checked against the source code of XNU, but not against
    #       real BSM file.
    BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
        u'bsm_token_aut_sockinet128', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV6_STRUCT)

    INET6_ADDR_TYPE = construct.Struct(u'addr_type',
                                       construct.UBInt16(u'ip_type'),
                                       construct.UBInt16(u'source_port'),
                                       construct.UBInt64(u'saddr_high'),
                                       construct.UBInt64(u'saddr_low'),
                                       construct.UBInt16(u'destination_port'),
                                       construct.UBInt64(u'daddr_high'),
                                       construct.UBInt64(u'daddr_low'))

    INET4_ADDR_TYPE = construct.Struct(
        u'addr_type', construct.UBInt16(u'ip_type'),
        construct.UBInt16(u'source_port'),
        construct.UBInt32(u'source_address'),
        construct.UBInt16(u'destination_port'),
        construct.UBInt32(u'destination_address'))

    # au_to_socket_ex // AUT_SOCKET_EX
    # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
    BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
        u'bsm_token_aut_sockinet32_ex', construct.UBInt16(u'socket_domain'),
        construct.UBInt16(u'socket_type'),
        construct.Switch(u'structure_addr_port',
                         _BsmTokenGetSocketDomain, {26: INET6_ADDR_TYPE},
                         default=INET4_ADDR_TYPE))

    # au_to_sock_unix // AUT_SOCKUNIX
    BSM_TOKEN_SOCKET_UNIX = construct.Struct(
        u'bsm_token_au_to_sock_unix', construct.UBInt16(u'family'),
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("path", 1)))

    # au_to_data // au_to_data
    # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
    # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
    # unit_count: number of type values.
    # BSM_TOKEN_DATA has a end field = type * unit_count
    BSM_TOKEN_DATA = construct.Struct(u'bsm_token_data',
                                      construct.UBInt8(u'how_to_print'),
                                      construct.UBInt8(u'data_type'),
                                      construct.UBInt8(u'unit_count'))

    # au_to_attr32 // AUT_ATTR32
    BSM_TOKEN_ATTR32 = construct.Struct(
        u'bsm_token_attr32', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt32(u'device'))

    # au_to_attr64 // AUT_ATTR64
    BSM_TOKEN_ATTR64 = construct.Struct(
        u'bsm_token_attr64', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt64(u'device'))

    # au_to_exit // AUT_EXIT
    BSM_TOKEN_EXIT = construct.Struct(u'bsm_token_exit',
                                      construct.UBInt32(u'status'),
                                      construct.UBInt32(u'return_value'))

    # au_to_newgroups // AUT_NEWGROUPS
    # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
    BSM_TOKEN_GROUPS = construct.UBInt16(u'group_number')

    # au_to_exec_env == au_to_exec_args
    BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

    # au_to_zonename //AUT_ZONENAME
    BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

    # Token ID.
    # List of valid Token_ID.
    # Token_ID -> [NAME_STRUCTURE, STRUCTURE]
    # Only the checked structures are been added to the valid structures lists.
    BSM_TYPE_LIST = {
        17: [u'BSM_TOKEN_FILE', BSM_TOKEN_FILE],
        19: [u'BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER],
        20: [u'BSM_HEADER32', BSM_HEADER32],
        21: [u'BSM_HEADER64', BSM_HEADER64],
        33: [u'BSM_TOKEN_DATA', BSM_TOKEN_DATA],
        34: [u'BSM_TOKEN_IPC', BSM_TOKEN_IPC],
        35: [u'BSM_TOKEN_PATH', BSM_TOKEN_PATH],
        36: [u'BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32],
        38: [u'BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32],
        39: [u'BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32],
        40: [u'BSM_TOKEN_TEXT', BSM_TOKEN_TEXT],
        41: [u'BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE],
        42: [u'BSM_TOKEN_ADDR', BSM_TOKEN_ADDR],
        43: [u'BSM_TOKEN_IP', BSM_TOKEN_IP],
        44: [u'BSM_TOKEN_PORT', BSM_TOKEN_PORT],
        45: [u'BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32],
        47: [u'BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE],
        96: [u'BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME],
        113: [u'BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64],
        114: [u'BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64],
        116: [u'BSM_HEADER32_EX', BSM_HEADER32_EX],
        119: [u'BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64],
        122: [u'BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX],
        127: [u'BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX],
        128: [u'BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32]
    }

    # Untested structures.
    # When not tested structure is found, we try to parse using also
    # these structures.
    BSM_TYPE_LIST_NOT_TESTED = {
        49: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        50: [u'BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM],
        52: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        59: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        60: [u'BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS],
        61: [u'BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV],
        62: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        82: [u'BSM_TOKEN_EXIT', BSM_TOKEN_EXIT],
        115: [u'BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64],
        117: [u'BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64],
        123: [u'BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX],
        124: [u'BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX],
        125: [u'BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX],
        126: [u'BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT],
        129: [u'BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128],
        130: [u'BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX]
    }

    def __init__(self):
        """Initializes a parser object."""
        super(BsmParser, self).__init__()
        # Create the dictionary with all token IDs: tested and untested.
        self.bsm_type_list_all = self.BSM_TYPE_LIST.copy()
        self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

    def _CopyByteArrayToBase16String(self, byte_array):
        """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array: A byte array.

    Returns:
      A base-16 encoded Unicode string.
    """
        return u''.join([u'{0:02x}'.format(byte) for byte in byte_array])

    def _CopyUtf8ByteArrayToString(self, byte_array):
        """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array: A byte array containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        byte_stream = b''.join(map(chr, byte_array))

        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(u'Unable to decode UTF-8 formatted byte array.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')

        string, _, _ = string.partition(b'\x00')
        return string

    def _IPv4Format(self, address):
        """Change an integer IPv4 address value for its 4 octets representation.

    Args:
      address: integer with the IPv4 address.

    Returns:
      IPv4 address in 4 octet representation (class A, B, C, D).
    """
        ipv4_string = self.IPV4_STRUCT.build(address)
        return socket.inet_ntoa(ipv4_string)

    def _IPv6Format(self, high, low):
        """Provide a readable IPv6 IP having the high and low part in 2 integers.

    Args:
      high: 64 bits integers number with the high part of the IPv6.
      low: 64 bits integers number with the low part of the IPv6.

    Returns:
      String with a well represented IPv6.
    """
        ipv6_string = self.IPV6_STRUCT.build(
            construct.Container(high=high, low=low))
        # socket.inet_ntop not supported in Windows.
        if hasattr(socket, u'inet_ntop'):
            return socket.inet_ntop(socket.AF_INET6, ipv6_string)

        # TODO: this approach returns double "::", illegal IPv6 addr.
        str_address = binascii.hexlify(ipv6_string)
        address = []
        blank = False
        for pos in range(0, len(str_address), 4):
            if str_address[pos:pos + 4] == u'0000':
                if not blank:
                    address.append(u'')
                    blank = True
            else:
                blank = False
                address.append(str_address[pos:pos + 4].lstrip(u'0'))
        return u':'.join(address)

    def _RawToUTF8(self, byte_stream):
        """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream: A byte stream containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(
                u'Decode UTF8 failed, the message string may be cut short.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')
        return string.partition(b'\x00')[0]

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a BSM file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            is_bsm = self.VerifyFile(parser_mediator, file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse BSM file with error: {0:s}'.format(
                    exception))

        if not is_bsm:
            raise errors.UnableToParseFile(u'Not a BSM File, unable to parse.')

        event_object = self.ReadBSMEvent(parser_mediator, file_object)
        while event_object:
            parser_mediator.ProduceEvent(event_object)

            event_object = self.ReadBSMEvent(parser_mediator, file_object)

    def ReadBSMEvent(self, parser_mediator, file_object):
        """Returns a BsmEvent from a single BSM entry.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Returns:
      An event object.
    """
        # A list of tokens that has the entry.
        extra_tokens = []

        offset = file_object.tell()

        # Token header, first token for each entry.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        if bsm_type == u'BSM_HEADER32':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER64':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER32_EX':
            token = structure.parse_stream(file_object)
        else:
            logging.warning(
                u'Token ID Header {0} not expected at position 0x{1:X}.'
                u'The parsing of the file cannot be continued'.format(
                    token_id, file_object.tell()))
            # TODO: if it is a Mac OS X, search for the trailer magic value
            #       as a end of the entry can be a possibility to continue.
            return

        length = token.bsm_header.length
        event_type = u'{0} ({1})'.format(
            bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type,
                                         u'UNKNOWN'),
            token.bsm_header.event_type)
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            token.timestamp, token.microsecond)

        # Read until we reach the end of the record.
        while file_object.tell() < (offset + length):
            # Check if it is a known token.
            try:
                token_id = self.BSM_TYPE.parse_stream(file_object)
            except (IOError, construct.FieldError):
                logging.warning(
                    u'Unable to parse the Token ID at position: {0:d}'.format(
                        file_object.tell()))
                return
            if not token_id in self.BSM_TYPE_LIST:
                pending = (offset + length) - file_object.tell()
                extra_tokens.extend(
                    self.TryWithUntestedStructures(file_object, token_id,
                                                   pending))
            else:
                token = self.BSM_TYPE_LIST[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))

        if file_object.tell() > (offset + length):
            logging.warning(u'Token ID {0} not expected at position 0x{1:X}.'
                            u'Jumping for the next entry.'.format(
                                token_id, file_object.tell()))
            try:
                file_object.seek((offset + length) - file_object.tell(),
                                 os.SEEK_CUR)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to jump to next entry with error: {0:s}'.format(
                        exception))
                return

        # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
        if parser_mediator.platform == u'MacOSX':
            # In Mac OS X the last two tokens are the return status and the trailer.
            if len(extra_tokens) >= 2:
                return_value = extra_tokens[-2:-1][0]
                if (return_value.startswith(u'[BSM_TOKEN_RETURN32')
                        or return_value.startswith(u'[BSM_TOKEN_RETURN64')):
                    _ = extra_tokens.pop(len(extra_tokens) - 2)
                else:
                    return_value = u'Return unknown'
            else:
                return_value = u'Return unknown'
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return MacBsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                               return_value, trailer, offset)
        else:
            # Generic BSM format.
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return BsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                            trailer, offset)

    def VerifyFile(self, parser_mediator, file_object):
        """Check if the file is a BSM file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_event: file that we want to check.

    Returns:
      True if this is a valid BSM file, otherwise False.
    """
        if file_object.tell() != 0:
            file_object.seek(0)

        # First part of the entry is always a Header.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False
        if token_id not in self.BSM_TYPE_LIST:
            return False

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        try:
            if bsm_type == u'BSM_HEADER32':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER64':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER32_EX':
                header = structure.parse_stream(file_object)
            else:
                return False
        except (IOError, construct.FieldError):
            return False
        if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
            return False

        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False

        # If is Mac OS X BSM file, next entry is a  text token indicating
        # if it is a normal start or it is a recovery track.
        if parser_mediator.platform == u'MacOSX':
            bsm_type_list = self.BSM_TYPE_LIST.get(token_id)
            if not bsm_type_list:
                return False

            if bsm_type_list[0] != u'BSM_TOKEN_TEXT':
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False
            try:
                token = self.BSM_TOKEN_TEXT.parse_stream(file_object)
            except (IOError, construct.FieldError):
                return

            text = self._CopyUtf8ByteArrayToString(token.text)
            if (text != u'launchctl::Audit startup'
                    and text != u'launchctl::Audit recovery'):
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False

        file_object.seek(0)
        return True

    def TryWithUntestedStructures(self, file_object, token_id, pending):
        """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
        # Data from the unknown structure.
        start_position = file_object.tell()
        start_token_id = token_id
        extra_tokens = []

        # Read all the "pending" bytes.
        try:
            if token_id in self.bsm_type_list_all:
                token = self.bsm_type_list_all[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))
                while file_object.tell() < (start_position + pending):
                    # Check if it is a known token.
                    try:
                        token_id = self.BSM_TYPE.parse_stream(file_object)
                    except (IOError, construct.FieldError):
                        logging.warning(
                            u'Unable to parse the Token ID at position: {0:d}'.
                            format(file_object.tell()))
                        return
                    if token_id not in self.bsm_type_list_all:
                        break
                    token = self.bsm_type_list_all[token_id][1].parse_stream(
                        file_object)
                    extra_tokens.append(
                        self.FormatToken(token_id, token, file_object))
        except (IOError, construct.FieldError):
            token_id = 255

        next_entry = (start_position + pending)
        if file_object.tell() != next_entry:
            # Unknown Structure.
            logging.warning(
                u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
                    start_position - 1, token_id, token_id))
            # TODO: another way to save this information must be found.
            extra_tokens.append(u'Plaso: some tokens from this entry can '
                                u'not be saved. Entry at 0x{0:X} with unknown '
                                u'token id "0x{1:X}".'.format(
                                    start_position - 1, start_token_id))
            # Move to next entry.
            file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
            # It returns null list because it doesn't know witch structure was
            # the incorrect structure that makes that it can arrive to the spected
            # end of the entry.
            return []
        return extra_tokens

    # TODO: instead of compare the text to know what structure was parsed
    #       is better to compare directly the numeric number (token_id),
    #       less readable, but better performance.
    def FormatToken(self, token_id, token, file_object):
        """Parse the Token depending of the type of the structure.

    Args:
      token_id: Identification integer of the token_type.
      token: Token struct to parse.
      file_object: BSM file.

    Returns:
      String with the parsed Token values.
    """
        if token_id not in self.bsm_type_list_all:
            return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id)

        bsm_type, _ = self.bsm_type_list_all.get(token_id, [u'', u''])

        if bsm_type in [
                u'BSM_TOKEN_TEXT', u'BSM_TOKEN_PATH', u'BSM_TOKEN_ZONENAME'
        ]:
            try:
                string = self._CopyUtf8ByteArrayToString(token.text)
            except TypeError:
                string = u'Unknown'
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type in [
                u'BSM_TOKEN_RETURN32', u'BSM_TOKEN_RETURN64', u'BSM_TOKEN_EXIT'
        ]:
            return u'[{0}: {1} ({2}), System call status: {3}]'.format(
                bsm_type, bsmtoken.BSM_ERRORS.get(token.status, u'Unknown'),
                token.status, token.return_value)

        elif bsm_type in [u'BSM_TOKEN_SUBJECT32', u'BSM_TOKEN_SUBJECT64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_SUBJECT32_EX', u'BSM_TOKEN_SUBJECT64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type in [u'BSM_TOKEN_ARGUMENT32', u'BSM_TOKEN_ARGUMENT64']:
            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format(
                bsm_type, string, token.num_arg, token.name_arg)

        elif bsm_type in [u'BSM_TOKEN_EXEC_ARGUMENTS', u'BSM_TOKEN_EXEC_ENV']:
            arguments = []
            for _ in range(0, token):
                sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(
                    file_object)
                string = self._CopyUtf8ByteArrayToString(sub_token.text)
                arguments.append(string)
            return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32':
            return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv4Format(token.ipv4)))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET128':
            return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_ADDR':
            return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token))

        elif bsm_type == u'BSM_TOKEN_IP':
            return u'[IPv4_Header: 0x{0:s}]'.format(token.encode(u'hex'))

        elif bsm_type == u'BSM_TOKEN_ADDR_EXT':
            return u'[{0}: {1} ({2}). Address {3}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_PORT':
            return u'[{0}: {1}]'.format(bsm_type, token)

        elif bsm_type == u'BSM_TOKEN_TRAILER':
            return u'[{0}: {1}]'.format(bsm_type, token.record_length)

        elif bsm_type == u'BSM_TOKEN_FILE':
            # TODO: if this timestamp is usefull, it must be extracted as a separate
            #       event object.
            timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
                token.timestamp, token.microsecond)
            date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
            date_time_string = date_time.strftime(u'%Y-%m-%d %H:%M:%S')

            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}, timestamp: {2:s}]'.format(
                bsm_type, string, date_time_string)

        elif bsm_type == u'BSM_TOKEN_IPC':
            return u'[{0}: object type {1}, object id {2}]'.format(
                bsm_type, token.object_type, token.object_id)

        elif bsm_type in [u'BSM_TOKEN_PROCESS32', u'BSM_TOKEN_PROCESS64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_PROCESS32_EX', u'BSM_TOKEN_PROCESS64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type == u'BSM_TOKEN_DATA':
            data = []
            data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, u'')
            if data_type == u'AUR_CHAR':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))
            elif data_type == u'AUR_SHORT':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object))
            elif data_type == u'AUR_INT32':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))
            else:
                data.append(u'Unknown type data')
            # TODO: the data when it is string ends with ".", HW a space is return
            #       after uses the UTF-8 conversion.
            return u'[{0}: Format data: {1}, Data: {2}]'.format(
                bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
                self._RawToUTF8(u''.join(data)))

        elif bsm_type in [u'BSM_TOKEN_ATTR32', u'BSM_TOKEN_ATTR64']:
            return (u'[{0}: Mode: {1}, UID: {2}, GID: {3}, '
                    u'File system ID: {4}, Node ID: {5}, Device: {6}]').format(
                        bsm_type, token.file_mode, token.uid, token.gid,
                        token.file_system_id, token.file_system_node_id,
                        token.device)

        elif bsm_type == u'BSM_TOKEN_GROUPS':
            arguments = []
            for _ in range(token):
                arguments.append(
                    self._RawToUTF8(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
            return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32_EX':
            if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain,
                                          u'') == u'INET6':
                saddr = self._IPv6Format(token.structure_addr_port.saddr_high,
                                         token.structure_addr_port.saddr_low)
                daddr = self._IPv6Format(token.structure_addr_port.daddr_high,
                                         token.structure_addr_port.daddr_low)
            else:
                saddr = self._IPv4Format(
                    token.structure_addr_port.source_address)
                daddr = self._IPv4Format(
                    token.structure_addr_port.destination_address)

            return u'[{0}: from {1} port {2} to {3} port {4}]'.format(
                bsm_type, saddr, token.structure_addr_port.source_port, daddr,
                token.structure_addr_port.destination_port)

        elif bsm_type == u'BSM_TOKEN_IPC_PERM':
            return (u'[{0}: user id {1}, group id {2}, create user id {3}, '
                    u'create group id {4}, access {5}]').format(
                        bsm_type, token.user_id, token.group_id,
                        token.creator_user_id, token.creator_group_id,
                        token.access_mode)

        elif bsm_type == u'BSM_TOKEN_SOCKET_UNIX':
            string = self._CopyUtf8ByteArrayToString(token.path)
            return u'[{0}: Family {1}, Path {2:s}]'.format(
                bsm_type, token.family, string)

        elif bsm_type == u'BSM_TOKEN_OPAQUE':
            string = self._CopyByteArrayToBase16String(token.text)
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type == u'BSM_TOKEN_SEQUENCE':
            return u'[{0}: {1}]'.format(bsm_type, token)
Example #11
0
class JavaIDXParser(interface.FileObjectParser):
    """Parse Java WebStart Cache IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    _INITIAL_FILE_OFFSET = None

    NAME = u'java_idx'
    DESCRIPTION = u'Parser for Java WebStart Cache IDX files.'

    IDX_SHORT_STRUCT = construct.Struct(u'magic', construct.UBInt8(u'busy'),
                                        construct.UBInt8(u'incomplete'),
                                        construct.UBInt32(u'idx_version'))

    IDX_602_STRUCT = construct.Struct(
        u'IDX_602_Full', construct.UBInt16(u'null_space'),
        construct.UBInt8(u'shortcut'), construct.UBInt32(u'content_length'),
        construct.UBInt64(u'last_modified_date'),
        construct.UBInt64(u'expiration_date'),
        construct.PascalString(u'version_string',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'url',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'namespace',
                               length_field=construct.UBInt16(u'length')),
        construct.UBInt32(u'FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        u'IDX_605_Section1', construct.UBInt8(u'shortcut'),
        construct.UBInt32(u'content_length'),
        construct.UBInt64(u'last_modified_date'),
        construct.UBInt64(u'expiration_date'),
        construct.UBInt64(u'validation_date'), construct.UBInt8(u'signed'),
        construct.UBInt32(u'sec2len'), construct.UBInt32(u'sec3len'),
        construct.UBInt32(u'sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        u'IDX_605_Section2',
        construct.PascalString(u'version',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'url',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'namespec',
                               length_field=construct.UBInt16(u'length')),
        construct.PascalString(u'ip_address',
                               length_field=construct.UBInt16(u'length')),
        construct.UBInt32(u'FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        u'Java.ReadUTF',
        construct.PascalString(u'string',
                               length_field=construct.UBInt16(u'length')))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Java WebStart Cache IDX file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = u'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128,
                                 os.SEEK_SET)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = u'Unknown'
                ip_address = u'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == u'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                try:
                    download_date = timelib.Timestamp.FromTimeString(
                        value.string, gmt_as_timezone=False)
                except errors.TimestampError:
                    download_date = None
                    parser_mediator.ProduceExtractionError(
                        u'Unable to parse time value: {0:s}'.format(
                            value.string))

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        event_data = JavaIDXEventData()
        event_data.idx_version = magic.idx_version
        event_data.ip_address = ip_address
        event_data.url = url

        date_time = dfdatetime_java_time.JavaTime(timestamp=last_modified_date)
        # TODO: Move the timestamp description into eventdata.
        event = time_events.DateTimeValuesEvent(date_time, u'File Hosted Date')
        parser_mediator.ProduceEventWithEventData(event, event_data)

        if section_one:
            expiration_date = section_one.get(u'expiration_date', None)
            if expiration_date:
                date_time = dfdatetime_java_time.JavaTime(
                    timestamp=expiration_date)
                event = time_events.DateTimeValuesEvent(
                    date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
                parser_mediator.ProduceEventWithEventData(event, event_data)

        if download_date:
            event = time_events.TimestampEvent(
                download_date, definitions.TIME_DESCRIPTION_FILE_DOWNLOADED)
            parser_mediator.ProduceEventWithEventData(event, event_data)
Example #12
0
    def _getInvariantCode(self, vbrType, vbrStruct):
        """
            Helper method that finds all the sections of the boot code that can be hashed and compared to a whitelist.
            This means that localized strings and other variable parameters (BPB, etc...) are excluded.
            Currently, this method only supports NTFS and Bitlocker VBR.

        Args:
            vbrType: unicode string corresponding to the VBR type ('NTFS' or 'bitlocker')
            vbrStruct: construct.container of the VBR

        Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code)

        """
        codeStart = 0
        codeEnd = None
        invariantCode = str()
        expectedLoader = None

        if vbrType == 'NTFS':
            # The first three bytes are a jump over the NTFS BPB to where the code really starts (0x54) and a NOP
            invariantCode += vbrStruct.JumpOverBPB
            codeStart = 0x54
            # NTFS VBR contains localized strings which must be excluded from the hash computation.
            # Before Windows 8, these strings are located at 4 different offsets which can be calculated by adding
            # 0x100 to the values respectively stored in bytes 0x1f8, 0x1f9, 0x1fa and 0x1fb.
            # Starting from Windows 8, these strings are located at 3 different offsets which are directly stored in
            # little endian words respectively at 0x1f6, 0x1f8 and 0x1fa
            # Since there is no easy way to tell which version of Windows we are dealing with beforehand, we first
            # assume it is a Windows < 8 by testing 0x1f8 against all the known first offset. If all tests fail, assume
            # it is Windows >= 8 and check 0x1f6 against the only known first offset (to date)
            firstStrOffset = construct.UBInt8('FirstStringOffset').parse(
                self._raw[0x1f8])
            # Windows NT5
            if firstStrOffset == 0x83:
                expectedLoader = 'NT5.1/NT5.2 VBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.0
            elif firstStrOffset == 0x80:
                expectedLoader = 'NT6.0 VBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.1
            elif firstStrOffset == 0x8c:
                expectedLoader = 'NT6.1 VBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.2+
            else:
                firstStrOffset = construct.ULInt16('FirstStringOffset').parse(
                    self._raw[0x1f6:0x1f8])
                if firstStrOffset == 0x18a:
                    expectedLoader = 'NT6.2+ VBR'
                    codeEnd = firstStrOffset

            if codeEnd is None:
                self._suspiciousBehaviour.append(
                    'Invalid string offset: {0:#x}'.format(firstStrOffset))
                self._logger.debug(
                    'First localized string offset is wrong for a NTFS VBR: {0:#x}. '
                    'It should be 0x83, 0x80, 0x8c or 0x18a.'.format(
                        firstStrOffset))
                codeEnd = 0

        elif vbrType == 'bitlocker':
            expectedLoader = 'NT6.1+ Bitlocker VBR'
            # The first three bytes are a jump over the NTFS BPB to where the code really starts (0x5A) and a NOP
            invariantCode += vbrStruct.JumpOverBPB
            # First section of code (_BITLOCKER_VBR_STRUCT.Code1)
            invariantCode += vbrStruct.Code1
            # In the second section of code, there are localized strings which must be excluded from hash computation.
            # Their offsets are stored in the last 3 bytes before the VBR signature (0x55aa).
            # For Windows 8, 8.1 and 10, the first string offset seems to always be 0x100 (ie. FirstStrOffset = 0x00)
            if vbrStruct.FirstStrOffset != 0:
                self._suspiciousBehaviour.append(
                    'Invalid string offset: {0:#x}'.format(
                        vbrStruct.FirstStrOffset))
                self._logger.debug(
                    'First localized string offset is wrong for a Bitlocker VBR. '
                    'It should be 0x00) : {0:#x}'.format(
                        vbrStruct.FirstStrOffset))
            codeStart = 0xc8  # Offset of Code2
            codeEnd = 0x100 + vbrStruct.FirstStrOffset
        else:
            raise NotImplementedError(
                'VBR type "{0}" is not implemented yet'.format(vbrType))

        self._logger.debug(
            'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format(
                expectedLoader, codeStart, codeEnd))

        invariantCode += self._raw[codeStart:codeEnd]
        return expectedLoader, invariantCode
Example #13
0
class _GzipMember(object):
    """Gzip member.

  Gzip files have no index of members, so each member must be read
  sequentially before metadata and random seeks are possible. This class
  provides caching of gzip member data during the initial read of each member.

  Attributes:
    comment (str): comment stored in the member.
    member_end_offset (int): offset to the end of the member in the parent file
        object.
    member_start_offset (int): offset to the start of the member in the parent
        file object.
    operating_system (int): type of file system on which the compression
        took place.
    original_filename (str): original filename of the uncompressed file.
    uncompressed_data_offset (int): offset of the start of the uncompressed
        data in this member relative to the whole gzip file's uncompressed data.
    uncompressed_data_size (int): total size of the data in this gzip member
        after decompression.
  """
    _MEMBER_HEADER_STRUCT = construct.Struct(
        'file_header', construct.ULInt16('signature'),
        construct.UBInt8('compression_method'), construct.UBInt8('flags'),
        construct.SLInt32('modification_time'),
        construct.UBInt8('extra_flags'), construct.UBInt8('operating_system'))

    _MEMBER_FOOTER_STRUCT = construct.Struct(
        'file_footer', construct.ULInt32('checksum'),
        construct.ULInt32('uncompressed_data_size'))

    _GZIP_SIGNATURE = 0x8b1f

    _COMPRESSION_METHOD_DEFLATE = 8

    _FLAG_FTEXT = 0x01
    _FLAG_FHCRC = 0x02
    _FLAG_FEXTRA = 0x04
    _FLAG_FNAME = 0x08
    _FLAG_FCOMMENT = 0x10

    # The maximum size of the uncompressed data cache.
    _UNCOMPRESSED_DATA_CACHE_SIZE = 2 * 1024 * 1024

    def __init__(self, file_object, member_start_offset,
                 uncompressed_data_offset):
        """Initializes a gzip member.

    Args:
      file_object (FileIO): file-like object, containing the gzip member.
      member_start_offset (int): offset to the beginning of the gzip member
          in the containing file.
      uncompressed_data_offset (int): current offset into the uncompressed data
          in the containing file.
    """
        self.comment = None
        self.modification_time = None
        self.operating_system = None
        self.original_filename = None

        # Offset into this member's uncompressed data of the first item in
        # the cache.
        self._cache_start_offset = None
        # Offset into this member's uncompressed data of the last item in
        # the cache.
        self._cache_end_offset = None
        self._cache = b''

        # Total size of the data in this gzip member after decompression.
        self.uncompressed_data_size = None
        # Offset of the start of the uncompressed data in this member relative to
        # the whole gzip file's uncompressed data.
        self.uncompressed_data_offset = uncompressed_data_offset

        # Offset to the start of the member in the parent file object.
        self.member_start_offset = member_start_offset

        # Initialize the member with data.
        self._file_object = file_object
        self._file_object.seek(self.member_start_offset, os.SEEK_SET)

        self._ReadAndParseHeader(file_object)
        # Offset to the beginning of the compressed data in the file object.
        self._compressed_data_start = file_object.get_offset()

        self._decompressor_state = _GzipDecompressorState(
            self._compressed_data_start)

        self._LoadDataIntoCache(file_object, 0, read_all_data=True)

        self._ReadAndParseFooter(file_object)

        # Offset to the end of the member in the parent file object.
        self.member_end_offset = file_object.get_offset()

    def GetCacheSize(self):
        """Determines the size of the uncompressed cached data.

    Returns:
      int: number of cached bytes.
    """
        if not self._cache_start_offset or not self._cache_end_offset:
            return 0
        return self._cache_end_offset - self._cache_start_offset

    def IsCacheFull(self):
        """Checks whether the uncompressed data cache is full.

    Returns:
      bool: True if the cache is full.
    """
        return self.GetCacheSize() >= self._UNCOMPRESSED_DATA_CACHE_SIZE

    def FlushCache(self):
        """Empties the cache that holds cached decompressed data."""
        self._cache = b''
        self._cache_start_offset = None
        self._cache_end_offset = None
        self._ResetDecompressorState()

    def _ResetDecompressorState(self):
        """Resets the state of the internal decompression object."""
        self._decompressor_state = _GzipDecompressorState(
            self._compressed_data_start)

    def ReadAtOffset(self, offset, size=None):
        """Reads a byte string from the gzip member at the specified offset.

    The function will read a byte string of the specified size or
    all of the remaining data if no size was specified.

    Args:
      offset (int): offset within the uncompressed data in this member to
        read from.
      size (Optional[int]): maximum number of bytes to read, where None
          represents all remaining data, to a maximum of the uncompressed
          cache size.

    Returns:
      bytes: data read.

    Raises:
      IOError: if the read failed.
      ValueError: if a negative read size or offset is specified.
    """
        if size is not None and size < 0:
            raise ValueError('Invalid size value {0!d}'.format(size))

        if offset < 0:
            raise ValueError('Invalid offset value {0!d}'.format(offset))

        if size == 0 or offset >= self.uncompressed_data_size:
            return b''

        if self._cache_start_offset is None:
            self._LoadDataIntoCache(self._file_object, offset)

        if offset > self._cache_end_offset or offset < self._cache_start_offset:
            self.FlushCache()
            self._LoadDataIntoCache(self._file_object, offset)

        cache_offset = offset - self._cache_start_offset
        if not size:
            return self._cache[cache_offset:]

        data_end_offset = cache_offset + size

        if data_end_offset > self._cache_end_offset:
            return self._cache[cache_offset:]

        return self._cache[cache_offset:data_end_offset]

    def _LoadDataIntoCache(self,
                           file_object,
                           minimum_offset,
                           read_all_data=False):
        """Reads and decompresses the data in the member.

    This function already loads as much data as possible in the cache, up to
    UNCOMPRESSED_DATA_CACHE_SIZE bytes.

    Args:
      file_object (FileIO): file-like object.
      minimum_offset (int): offset into this member's uncompressed data at
          which the cache should start.
      read_all_data (bool): True if all the compressed data should be read
          from the member.
    """
        # Decompression can only be performed from beginning to end of the stream.
        # So, if data before the current position of the decompressor in the stream
        # is required, it's necessary to throw away the current decompression
        # state and start again.
        if minimum_offset < self._decompressor_state.uncompressed_offset:
            self._ResetDecompressorState()

        while not self.IsCacheFull() or read_all_data:
            decompressed_data = self._decompressor_state.Read(file_object)
            decompressed_data_length = len(decompressed_data)
            decompressed_end_offset = self._decompressor_state.uncompressed_offset
            decompressed_start_offset = (decompressed_end_offset -
                                         decompressed_data_length)

            data_to_add = decompressed_data
            added_data_start_offset = decompressed_start_offset

            if decompressed_start_offset < minimum_offset:
                data_to_add = None

            if decompressed_start_offset < minimum_offset < decompressed_end_offset:
                data_add_offset = decompressed_end_offset - minimum_offset
                data_to_add = decompressed_data[-data_add_offset]
                added_data_start_offset = decompressed_end_offset - data_add_offset

            if not self.IsCacheFull() and data_to_add:
                self._cache = b''.join([self._cache, data_to_add])
                if self._cache_start_offset is None:
                    self._cache_start_offset = added_data_start_offset
                if self._cache_end_offset is None:
                    self._cache_end_offset = self._cache_start_offset + len(
                        data_to_add)
                else:
                    self._cache_end_offset += len(data_to_add)

            # If there's no more data in the member, the unused_data value is
            # populated in the decompressor. When this situation arises, we rewind
            # to the end of the compressed_data section.
            unused_data = self._decompressor_state.GetUnusedData()
            if unused_data:
                seek_offset = -len(unused_data)
                file_object.seek(seek_offset, os.SEEK_CUR)
                self._ResetDecompressorState()
                break

    def _ReadAndParseHeader(self, file_object):
        """Reads the member header and sets relevant member values.

    Args:
      file_object (FileIO): file-like object to read from.

    Raises:
      FileFormatError: if file format related errors are detected.
    """
        member_header = self._MEMBER_HEADER_STRUCT.parse_stream(file_object)

        if member_header.signature != self._GZIP_SIGNATURE:
            raise errors.FileFormatError(
                'Unsupported file signature: 0x{0:04x}.'.format(
                    member_header.signature))

        if member_header.compression_method != self._COMPRESSION_METHOD_DEFLATE:
            raise errors.FileFormatError(
                'Unsupported compression method: {0:d}.'.format(
                    member_header.compression_method))

        self.modification_time = member_header.modification_time
        self.operating_system = member_header.operating_system

        if member_header.flags & self._FLAG_FEXTRA:
            extra_field_data_size = construct.ULInt16(
                'extra_field_data_size').parse_stream(file_object)
            file_object.seek(extra_field_data_size, os.SEEK_CUR)

        if member_header.flags & self._FLAG_FNAME:
            # Since encoding is set construct will convert the C string to Unicode.
            # Note that construct 2 does not support the encoding to be a Unicode
            # string.
            self.original_filename = construct.CString(
                'original_filename',
                encoding=b'iso-8859-1').parse_stream(file_object)

        if member_header.flags & self._FLAG_FCOMMENT:
            # Since encoding is set construct will convert the C string to Unicode.
            # Note that construct 2 does not support the encoding to be a Unicode
            # string.
            self.comment = construct.CString(
                'comment', encoding=b'iso-8859-1').parse_stream(file_object)

        if member_header.flags & self._FLAG_FHCRC:
            file_object.read(2)

    def _ReadAndParseFooter(self, file_object):
        """Reads the member footer and sets relevant member values.

    Args:
      file_object (FileIO): file-like object to read from.

    Raises:
      FileFormatError: if file format related errors are detected.
    """
        file_footer = self._MEMBER_FOOTER_STRUCT.parse_stream(file_object)
        self.uncompressed_data_size = file_footer.uncompressed_data_size
Example #14
0
File: peer.py Project: romanz/pybt
    encoder=lambda obj, ctx : list(obj),
    decoder=lambda obj, ctx : ''.join(obj)
)

_commands = {
    'choke'         : [c.Magic('\x00')],
    'unchoke'       : [c.Magic('\x01')],
    'interested'    : [c.Magic('\x02')],
    'uninterested'  : [c.Magic('\x03')],
    'have'          : [c.Magic('\x04'), c.UBInt32('index')],
    'bitfield'      : [c.Magic('\x05'), Bytes('bits')],
    'request'       : [c.Magic('\x06'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')],
    'piece'         : [c.Magic('\x07'), c.UBInt32('index'), c.UBInt32('begin'), Bytes('data')],
    'cancel'        : [c.Magic('\x08'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')],
    'port'          : [c.Magic('\x09'), c.UBInt16('port')],
    'extended'      : [c.Magic('\x14'), c.UBInt8('cmd'), Bytes('msg')],
}

for k, v in _commands.items():
    _commands[k] = c.Struct(k, *v)

def build_handshake(info_hash, host_id, extensions):

    bits = bitarray.bitarray([0]*64, endian='little')
    for i in extensions:
        bits[i] = True

    obj = c.Container(info_hash=info_hash, peer_id=host_id, 
                      reserved=bits.tobytes())

    return _handshake.build(obj)
Example #15
0
class CupsIppParser(interface.BaseParser):
    """Parser for CUPS IPP files. """

    NAME = 'cups_ipp'
    DESCRIPTION = u'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct',
                                       construct.UBInt8('major_version'),
                                       construct.UBInt8('minor_version'),
                                       construct.UBInt16('operation_id'),
                                       construct.UBInt32('request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID.
    TYPE_GENERAL_INTEGER = 32
    TYPE_INTEGER = 33
    TYPE_ENUMERATION = 35
    TYPE_BOOL = 34

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8('integer')
    INTEGER_32 = construct.UBInt32('integer')
    TEXT = construct.PascalString('text',
                                  length_field=construct.UBInt8('length'))
    BOOLEAN = construct.Struct('boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct('integer_value', construct.Padding(1),
                               INTEGER_32)

    # Name of the pair.
    PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    NAME_PAIR_TRANSLATION = {
        'printer-uri': u'uri',
        'job-uuid': u'job_id',
        'DestinationPrinterID': u'printer_id',
        'job-originating-user-name': u'user',
        'job-name': u'job_name',
        'document-format': u'doc_type',
        'job-originating-host-name': u'computer_name',
        'com.apple.print.JobInfo.PMApplicationName': u'application',
        'com.apple.print.JobInfo.PMJobOwner': u'owner'
    }

    def Parse(self, parser_context, file_entry):
        """Extract a entry from an CUPS IPP file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse CUPS IPP Header with error: {0:s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            file_object.close()
            raise errors.UnableToParseFile(
                u'[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logging.debug(
                u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_context.GetDisplayName(file_entry)))

        # Read the pairs extracting the name and the value.
        data_dict = {}
        name, value = self.ReadPair(parser_context, file_entry, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self.ReadPair(parser_context, file_entry,
                                        file_object)

        # Yield the events.
        if u'time-at-creation' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-creation'][0],
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        if u'time-at-processing' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-processing'][0],
                                        eventdata.EventTimestamp.START_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        if u'time-at-completed' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-completed'][0],
                                        eventdata.EventTimestamp.END_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        file_object.close()

    def ReadPair(self, parser_context, file_entry, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      file_object: a file-like object that points to a file.

    Returns:
      A list of name and value. If name and value cannot be read both are
      set to None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            _ = self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported identifier in file: {1:s}.'.format(
                    self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        # Name = Length name + name + 0x00
        try:
            name = self.PAIR_NAME.parse_stream(file_object).text
        except (IOError, construct.FieldError):
            logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format(
                self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        try:
            if type_id in [
                    self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                    self.TYPE_ENUMERATION
            ]:
                value = self.INTEGER.parse_stream(file_object).integer

            elif type_id == self.TYPE_BOOL:
                value = bool(self.BOOLEAN.parse_stream(file_object).integer)

            else:
                value = self.TEXT.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported value in file: {1:s}.'.format(
                    self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        return name, value
Example #16
0
class MasterBootRecord(BootRecord):
    _MBR_STRUCT = construct.Struct(
        "mbr",
        construct.HexDumpAdapter(construct.Bytes("bootloader_code", 440)),
        construct.Field('disk_signature', 4),
        construct.Padding(2),
        construct.Array(
            4,
            construct.Struct(
                "partitions",
                construct.SLInt8("state"),
                construct.BitStruct(
                    "beginning",
                    construct.Octet("head"),
                    construct.Bits("sect", 6),
                    construct.Bits("cyl", 10),
                ),
                construct.Enum(
                    construct.UBInt8("type"),
                    Nothing=0x00,
                    FAT12=0x01,
                    XENIX_ROOT=0x02,
                    XENIX_USR=0x03,
                    FAT16_old=0x04,
                    Extended_DOS=0x05,
                    FAT16=0x06,
                    FAT32=0x0b,
                    FAT32_LBA=0x0c,
                    NTFS=0x07,
                    LINUX_SWAP=0x82,
                    LINUX_NATIVE=0x83,
                    PROTECTIVE_MBR=0xee,
                    _default_=construct.Pass,
                ),
                construct.BitStruct(
                    "ending",
                    construct.Octet("head"),
                    construct.Bits("sect", 6),
                    construct.Bits("cyl", 10),
                ),
                construct.ULInt32(
                    "sector_offset"),  # offset from MBR in sectors
                construct.ULInt32("size"),  # in sectors
            )),
        construct.Const(construct.Bytes("signature", 2), '55aa'.decode('hex')),
    )

    def __init__(self, filePath, size, offset=None, whitelist=()):
        self._type = 'MBR'
        super(MasterBootRecord, self).__init__(filePath, size, offset,
                                               whitelist)

    def _parse(self):
        """
            Main method in charge of parsing the MBR.
            It will try to parse the boot record according to documented known structure and extract the partition table
            disk signature and code section.
            It will then try to narrow down invariant code, hash it and match the hash against a whitelist.
            If no match was found, it will try some simple heuristics to detect malicious behaviours.

        Returns: nothing

        """
        try:
            mbr = self._MBR_STRUCT.parse(self._raw)
        except construct.core.ConstructError as e:
            raise InvalidMBRError('Invalid MBR structure: {0}\n{1}'.format(
                e, hexdump(self._raw)))

        self._parsePartTable(mbr.partitions)

        # Windows stores the disk signature at 0x1B8, other MBRs seem to leave this area alone
        self._diskSignature = mbr.disk_signature

        # If code section is null, check for protective MBR signature (detected in partition table parsing). If found,
        # then the machine is likely using UEFI instead of BIOS to boot. If not, it could mean that the sample being
        # analyzed has been tampered by a bootkit
        if mbr.bootloader_code.encode('hex') == 440 * '00':
            if 'Protective MBR' in self._signature:
                self._signature.append('UEFI (no legacy boot code)')
            else:
                self._suspiciousBehaviour.append('Code section is null')
        else:
            expectedLoader, invariantCode = self._getInvariantCode(
                mbr.bootloader_code)
            codeHash = hashlib.sha256(invariantCode)
            self._matchHash(codeHash, expectedLoader)
            if len(self._signature) == 0:
                # No whitelisted signature matched, try some simple heuristics to flag this MBR as malicious
                # Note that the self._checkCode method is only given the "invariant" code section to help with the
                # disassembling. This will obviously leads to broken offsets, but it doesn't matter since the heuristics
                # don't use them.
                self._checkCode(invariantCode)

    def _parsePartTable(self, partitions):
        """
            Private method that parses the partition table of the MBR. Updates self._partTable list.

        Args:
            partitions: Construct.Container object of the partition table

        Returns: nothing
        """
        partNum = 0
        for part in partitions:
            partNum += 1
            # Assume a partition entry without size (in LBA) or type is invalid, and do not include it in the listing.
            if part.size != 0 and part.type != 'Nothing':
                self._partTable.append((partNum, part.state < 0, part.type,
                                        part.sector_offset, part.size))
            else:
                self._logger.debug('Ignoring invalid partition: %s', part)
            # Early detection of protective MBR so that we don't try to make sense of the MBR partition table
            if part.type == 'PROTECTIVE_MBR' and partNum == 1:
                self._logger.debug(
                    'Protective MBR detected, MBR partition table should not be taken into account. '
                    'GPT partition table parser not implemented yet')
                self._signature.append('Protective MBR')

    def _getInvariantCode(self, rawCode):
        """
            Helper method that tries to narrow down "invariant code" which can be hashed and compared to well known
            signatures. Most MBRs have localized error strings which must be excluded from the hash computation because
            they may vary from a country to another.
            First, this method tries to detect what kind of MBR it is dealing with. Most of the time, it is enough to
            to look for some known hardcoded strings that identify "well known" MBR (such as Truecrypt, GRUB2, etc...).
            Then, this method finds where the strings are and "removes" them (as in "does not include them").
            Finding these strings can be achieved by quickly studying the assembly code and looking for how these
            strings are echoed on screen at boot time (using interrupt 0x10).
            This research only needs to be done once for each type of MBR but requires an analyst to do it by static
            analysis. This script cannot take care of this. This method merely implements the results of such work.

            Currently supported MBR are:
             - Truecrypt
             - McAfee Endpoint Encryption (Safeboot)
             - GRUB2
             - Windows (XP to 10)

        Args:
            rawCode: str of the code section

        Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code)

        """
        # By default, assume all the MBR code section will be hashed. It is obviously wrong in most cases, but it allows
        # for a "default case" which will automatically matches no known hash in case something goes wrong with the
        # detection.
        codeStart = 0
        codeEnd = len(rawCode)
        expectedLoader = None
        invariantCode = str()

        # TrueCrypt (detected with the hardcoded string following the first jump: " TrueCrypt Boot Loader")
        if rawCode[0x5:0x1b].encode('hex').upper(
        ) == '2054727565437279707420426F6F74204C6F61646572':
            # TrueCrypt uses hardcoded and not-localized error strings. Therefore every TrueCrypt MBR should have the
            # same code from start to end
            expectedLoader = 'TrueCrypt MBR'

        # MacAfee SafeBoot (detected with the hardcoded string following the first jump: "Safeboot ")
        elif rawCode[0x3:0xc].encode('hex').upper() == '53616665426F6F7420':
            # Two versions have been seen but both start with a jump to the same offset (0x26).
            # There are some strings at the of the code section but localization is unlikely so it will be assumed
            # to be hardcoded (until a localized version is found...).
            # Therefore, Safeboot code can be hashed from 0x26 to the end of code section
            invariantCode += rawCode[:0x3]  # Hash the first JMP
            codeStart = 0x26
            expectedLoader = 'Safeboot MBR'

        # GRUB (detected with the hardcoded string "GRUB " located at 0x188)
        elif rawCode[0x188:0x18d].encode('hex').upper() == '4752554220':
            # GRUB has some error strings but they are hardcoded and not localized so they can be included in the hash
            # computation. However GRUB can be installed on a disk (MBR) as well as on a partition (in a kind of VBR).
            # But in both cases the code used is the same. Since a BPB is needed for the latter case it is also present
            # in the MBR (but not needed). It therefore has to be excluded from the hash computation.
            # GRUB is jumping over the BIOS Parameter Block located between 0x3 and 0x5a.
            # It should be followed by the kernel address (word), kernel sector (dword), kernel sector high (dword) and
            # boot drive (byte). Therefore the code really starts at 0x65.
            # These values are hardcoded in boot.img and have little chance to change anytime soon.
            codeStart = 0x65
            invariantCode += rawCode[:0x3]  # Hash the first JMP
            expectedLoader = 'GRUB2 MBR'

        # Windows MBR cannot be detected with hardcoded strings, so they fall in the default case and further checks
        # are then made based on the hypothesis that this is indeed a Windows MBR.
        else:
            # Starting with NT5.0, the MBR contains localized strings which must be excluded from the hash computation.
            # These strings are located after the code, at 3 different offsets which can be calculated by adding 0x100
            # to the values respectively stored in bytes 0x1b5, 0x1b6 and 0x1b7 (last bytes of the code section).
            # Eg: The first localized string is at : 0x100 + the value saved at offset 0x1B5
            # Even though localized strings can be of different lengths, the offset of the first one does not vary
            # given one Windows version. This can therefore be used to tell Windows versions apart.
            firstStrOffset = construct.UBInt8('FirstStringOffset').parse(
                rawCode[0x1b5])
            # Windows NT5
            if firstStrOffset == 0x2c:
                expectedLoader = 'NT5.1/NT5.2 MBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.0
            elif firstStrOffset == 0x62:
                expectedLoader = 'NT6.0 MBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.1+
            elif firstStrOffset == 0x63:
                expectedLoader = 'NT6.1+ MBR'
                codeEnd = 0x100 + firstStrOffset
            else:
                self._suspiciousBehaviour.append(
                    'Invalid string offset: {0:#x}'.format(firstStrOffset))
                self._logger.debug(
                    'First localized string offset is wrong for a windows MBR.'
                    'It should be 0x2c, 0x62 or 0x63) : {0:#x}'.format(
                        firstStrOffset))

        self._logger.debug(
            'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format(
                expectedLoader, codeStart, codeEnd))

        invariantCode += rawCode[codeStart:codeEnd]
        return expectedLoader, invariantCode

    def _checkCode(self, rawCode):
        md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_16)
        md.detail = True

        checkJmp = True
        for i in md.disasm(rawCode, 0):
            # Check for JUMPs and CALLs before the first PUSH/RET.
            if checkJmp and len(i.groups) > 0:
                # Group check if available
                if hasattr(capstone.x86, 'X86_GRP_CALL') and hasattr(
                        capstone.x86, 'X86_GRP_RET'):
                    if capstone.x86.X86_GRP_CALL in i.groups or capstone.x86.X86_GRP_JUMP in i.groups:
                        self._suspiciousBehaviour.append(
                            'JMP or CALL before relocation')
                        checkJmp = False
                    elif capstone.x86.X86_GRP_RET in i.groups:
                        # Stop search after the first PUSH/RET
                        checkJmp = False
                # Manual check in case capstone version doesn't support CALL and RET groups
                else:
                    if i.mnemonic[0] == 'j' or i.mnemonic == 'call':
                        self._suspiciousBehaviour.append(
                            'JMP or CALL before relocation')
                        checkJmp = False
                    elif i.mnemonic[:3] == 'ret':
                        # Stop search after the first PUSH/RET
                        checkJmp = False

            # Check for unknown interrupt
            if i.mnemonic == 'int' and i.bytes[1] not in (0x10, 0x13, 0x18,
                                                          0x1a):
                self._suspiciousBehaviour.append(
                    'Unknown Interrupt : {0:#x}'.format(i.bytes[1]))
Example #17
0
class DataBlockFile(object):
  """Class that contains a data block file."""

  SIGNATURE = 0xc104cac3

  # TODO: update emtpy, hints, updating and user.
  _FILE_HEADER = construct.Struct(
      u'chrome_cache_data_file_header',
      construct.ULInt32(u'signature'),
      construct.ULInt16(u'minor_version'),
      construct.ULInt16(u'major_version'),
      construct.ULInt16(u'file_number'),
      construct.ULInt16(u'next_file_number'),
      construct.ULInt32(u'block_size'),
      construct.ULInt32(u'number_of_entries'),
      construct.ULInt32(u'maximum_number_of_entries'),
      construct.Array(4, construct.ULInt32(u'emtpy')),
      construct.Array(4, construct.ULInt32(u'hints')),
      construct.ULInt32(u'updating'),
      construct.Array(5, construct.ULInt32(u'user')),
      construct.Array(2028, construct.ULInt32(u'allocation_bitmap')))

  _CACHE_ENTRY = construct.Struct(
      u'chrome_cache_entry',
      construct.ULInt32(u'hash'),
      construct.ULInt32(u'next_address'),
      construct.ULInt32(u'rankings_node_address'),
      construct.ULInt32(u'reuse_count'),
      construct.ULInt32(u'refetch_count'),
      construct.ULInt32(u'state'),
      construct.ULInt64(u'creation_time'),
      construct.ULInt32(u'key_size'),
      construct.ULInt32(u'long_key_address'),
      construct.Array(4, construct.ULInt32(u'data_stream_sizes')),
      construct.Array(4, construct.ULInt32(u'data_stream_addresses')),
      construct.ULInt32(u'flags'),
      construct.Padding(16),
      construct.ULInt32(u'self_hash'),
      construct.Array(160, construct.UBInt8(u'key')))

  def __init__(self, debug=False):
    """Initializes the data block file object.

    Args:
      debug (Optional[bool]): True if debug information should be printed.
    """
    super(DataBlockFile, self).__init__()
    self._debug = debug
    self._file_object = None
    self._file_object_opened_in_object = False
    self.creation_time = None
    self.block_size = None
    self.number_of_entries = None
    self.version = None

  def _ReadFileHeader(self):
    """Reads the file header.

    Raises:
      IOError: if the file header cannot be read.
    """
    if self._debug:
      print(u'Seeking file header offset: 0x{0:08x}'.format(0))

    self._file_object.seek(0, os.SEEK_SET)

    file_header_data = self._file_object.read(self._FILE_HEADER.sizeof())

    if self._debug:
      print(u'Data block file header data:')
      print(hexdump.Hexdump(file_header_data))

    try:
      file_header = self._FILE_HEADER.parse(file_header_data)
    except construct.FieldError as exception:
      raise IOError(u'Unable to parse file header with error: {0:s}'.format(
          exception))

    signature = file_header.get(u'signature')

    if signature != self.SIGNATURE:
      raise IOError(u'Unsupported data block file signature')

    self.version = u'{0:d}.{1:d}'.format(
        file_header.get(u'major_version'),
        file_header.get(u'minor_version'))

    if self.version not in [u'2.0', u'2.1']:
      raise IOError(u'Unsupported data block file version: {0:s}'.format(
          self.version))

    self.version = u'{0:d}.{1:d}'.format(
        file_header.get(u'major_version'), file_header.get(u'minor_version'))

    self.block_size = file_header.get(u'block_size')
    self.number_of_entries = file_header.get(u'number_of_entries')

    if self._debug:
      print(u'Signature\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(signature))

      print(u'Version\t\t\t\t\t\t\t\t\t: {0:s}'.format(self.version))

      print(u'File number\t\t\t\t\t\t\t\t: {0:d}'.format(
          file_header.get(u'file_number')))

      print(u'Next file number\t\t\t\t\t\t\t: {0:d}'.format(
          file_header.get(u'next_file_number')))

      print(u'Block size\t\t\t\t\t\t\t\t: {0:d}'.format(self.block_size))

      print(u'Number of entries\t\t\t\t\t\t\t: {0:d}'.format(
          self.number_of_entries))

      print(u'Maximum number of entries\t\t\t\t\t\t: {0:d}'.format(
          file_header.get(u'maximum_number_of_entries')))

      # TODO: print emtpy, hints, updating and user.

      block_number = 0
      block_range_start = 0
      block_range_end = 0
      in_block_range = False
      for value_32bit in file_header.get(u'allocation_bitmap'):
        for unused_bit in range(0, 32):
          if value_32bit & 0x00000001:
            if not in_block_range:
              block_range_start = block_number
              block_range_end = block_number
              in_block_range = True

            block_range_end += 1

          elif in_block_range:
            in_block_range = False

            if self._debug:
              print(u'Block range\t: {0:d} - {1:d} ({2:d})'.format(
                  block_range_start, block_range_end,
                  block_range_end - block_range_start))

          value_32bit >>= 1
          block_number += 1

      print(u'')

  def ReadCacheEntry(self, block_offset):
    """Reads a cache entry.

    Args:
      block_offset (int): offset of the block that contains the cache entry.
    ""
    if self._debug:
      print(u'Seeking cache entry offset: 0x{0:08x}'.format(block_offset))

    self._file_object.seek(block_offset, os.SEEK_SET)

    cache_entry_data = self._file_object.read(self._CACHE_ENTRY.sizeof())

    if self._debug:
      print(u'Data block file cache entry data:')
      print(hexdump.Hexdump(cache_entry_data))

    try:
      cache_entry_struct = self._CACHE_ENTRY.parse(cache_entry_data)
    except construct.FieldError as exception:
      raise IOError(u'Unable to parse cache entry with error: {0:s}'.format(
          exception))

    cache_entry = CacheEntry()

    cache_entry.hash = cache_entry_struct.get(u'hash')

    cache_entry.next = CacheAddress(cache_entry_struct.get(u'next_address'))
    cache_entry.rankings_node = CacheAddress(cache_entry_struct.get(
        u'rankings_node_address'))

    cache_entry.creation_time = cache_entry_struct.get(u'creation_time')

    byte_array = cache_entry_struct.get(u'key')
    byte_string = b''.join(map(chr, byte_array))
    cache_entry.key, _, _ = byte_string.partition(b'\x00')

    if self._debug:
      print(u'Hash\t\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(cache_entry.hash))

      print(u'Next address\t\t\t\t\t\t\t\t: {0:s}'.format(
          cache_entry.next.GetDebugString()))

      print(u'Rankings node address\t\t\t\t\t\t\t: {0:s}'.format(
          cache_entry.rankings_node.GetDebugString()))

      print(u'Reuse count\t\t\t\t\t\t\t\t: {0:d}'.format(
          cache_entry_struct.get(u'reuse_count')))

      print(u'Refetch count\t\t\t\t\t\t\t\t: {0:d}'.format(
          cache_entry_struct.get(u'refetch_count')))

      print(u'State\t\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(
          cache_entry_struct.get(u'state')))

      date_string = (datetime.datetime(1601, 1, 1) +
                     datetime.timedelta(microseconds=cache_entry.creation_time))

      print(u'Creation time\t\t\t\t\t\t\t\t: {0!s} (0x{1:08x})'.format(
          date_string, cache_entry.creation_time))

      for value in cache_entry_struct.get(u'data_stream_sizes'):
        print(u'Data stream size\t\t\t\t\t\t\t: {0:d}'.format(value))

      cache_address_index = 0
      for value in cache_entry_struct.get(u'data_stream_addresses'):
        cache_address = CacheAddress(value)
        print(u'Data stream address: {0:d}\t\t\t\t\t\t\t: {1:s}'.format(
            cache_address_index, cache_address.GetDebugString()))
        cache_address_index += 1

      print(u'Flags\t\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(
          cache_entry_struct.get(u'flags')))

      print(u'Self hash\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(
          cache_entry_struct.get(u'self_hash')))

      try:
        cache_entry_key = cache_entry.key.decode(u'ascii')
      except UnicodeDecodeError:
        logging.warning((
            u'Unable to decode cache entry key at cache address: '
            u'0x{0:08x}. Characters that cannot be decoded will be '
            u'replaced with "?" or "\\ufffd".').format(cache_address.value))
        cache_entry_key = cache_entry.key.decode(u'ascii', errors=u'replace')

      print(u'Key\t\t\t\t\t\t\t\t\t: {0:s}'.format(cache_entry_key))

      # TODO: calculate and verify hash.

      print(u'')

    return cache_entry

  def Close(self):
    """Closes the data block file."""
    if self._file_object_opened_in_object:
      self._file_object.close()
    self._file_object = None

  def Open(self, filename):
    """Opens the data block file.

    Args:
      filename (str): path of the file.
    """
    self._file_object = open(filename, 'rb')
    self._file_object_opened_in_object = True
    self._ReadFileHeader()

  def OpenFileObject(self, file_object):
    """Opens the data block file.

    Args:
      file_object (file): file-like object.
Example #18
0
    def _getInvariantCode(self, rawCode):
        """
            Helper method that tries to narrow down "invariant code" which can be hashed and compared to well known
            signatures. Most MBRs have localized error strings which must be excluded from the hash computation because
            they may vary from a country to another.
            First, this method tries to detect what kind of MBR it is dealing with. Most of the time, it is enough to
            to look for some known hardcoded strings that identify "well known" MBR (such as Truecrypt, GRUB2, etc...).
            Then, this method finds where the strings are and "removes" them (as in "does not include them").
            Finding these strings can be achieved by quickly studying the assembly code and looking for how these
            strings are echoed on screen at boot time (using interrupt 0x10).
            This research only needs to be done once for each type of MBR but requires an analyst to do it by static
            analysis. This script cannot take care of this. This method merely implements the results of such work.

            Currently supported MBR are:
             - Truecrypt
             - McAfee Endpoint Encryption (Safeboot)
             - GRUB2
             - Windows (XP to 10)

        Args:
            rawCode: str of the code section

        Returns: 2-tuple (unicode string of expected loader, concatenated strings of invariant sections of code)

        """
        # By default, assume all the MBR code section will be hashed. It is obviously wrong in most cases, but it allows
        # for a "default case" which will automatically matches no known hash in case something goes wrong with the
        # detection.
        codeStart = 0
        codeEnd = len(rawCode)
        expectedLoader = None
        invariantCode = str()

        # TrueCrypt (detected with the hardcoded string following the first jump: " TrueCrypt Boot Loader")
        if rawCode[0x5:0x1b].encode('hex').upper(
        ) == '2054727565437279707420426F6F74204C6F61646572':
            # TrueCrypt uses hardcoded and not-localized error strings. Therefore every TrueCrypt MBR should have the
            # same code from start to end
            expectedLoader = 'TrueCrypt MBR'

        # MacAfee SafeBoot (detected with the hardcoded string following the first jump: "Safeboot ")
        elif rawCode[0x3:0xc].encode('hex').upper() == '53616665426F6F7420':
            # Two versions have been seen but both start with a jump to the same offset (0x26).
            # There are some strings at the of the code section but localization is unlikely so it will be assumed
            # to be hardcoded (until a localized version is found...).
            # Therefore, Safeboot code can be hashed from 0x26 to the end of code section
            invariantCode += rawCode[:0x3]  # Hash the first JMP
            codeStart = 0x26
            expectedLoader = 'Safeboot MBR'

        # GRUB (detected with the hardcoded string "GRUB " located at 0x188)
        elif rawCode[0x188:0x18d].encode('hex').upper() == '4752554220':
            # GRUB has some error strings but they are hardcoded and not localized so they can be included in the hash
            # computation. However GRUB can be installed on a disk (MBR) as well as on a partition (in a kind of VBR).
            # But in both cases the code used is the same. Since a BPB is needed for the latter case it is also present
            # in the MBR (but not needed). It therefore has to be excluded from the hash computation.
            # GRUB is jumping over the BIOS Parameter Block located between 0x3 and 0x5a.
            # It should be followed by the kernel address (word), kernel sector (dword), kernel sector high (dword) and
            # boot drive (byte). Therefore the code really starts at 0x65.
            # These values are hardcoded in boot.img and have little chance to change anytime soon.
            codeStart = 0x65
            invariantCode += rawCode[:0x3]  # Hash the first JMP
            expectedLoader = 'GRUB2 MBR'

        # Windows MBR cannot be detected with hardcoded strings, so they fall in the default case and further checks
        # are then made based on the hypothesis that this is indeed a Windows MBR.
        else:
            # Starting with NT5.0, the MBR contains localized strings which must be excluded from the hash computation.
            # These strings are located after the code, at 3 different offsets which can be calculated by adding 0x100
            # to the values respectively stored in bytes 0x1b5, 0x1b6 and 0x1b7 (last bytes of the code section).
            # Eg: The first localized string is at : 0x100 + the value saved at offset 0x1B5
            # Even though localized strings can be of different lengths, the offset of the first one does not vary
            # given one Windows version. This can therefore be used to tell Windows versions apart.
            firstStrOffset = construct.UBInt8('FirstStringOffset').parse(
                rawCode[0x1b5])
            # Windows NT5
            if firstStrOffset == 0x2c:
                expectedLoader = 'NT5.1/NT5.2 MBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.0
            elif firstStrOffset == 0x62:
                expectedLoader = 'NT6.0 MBR'
                codeEnd = 0x100 + firstStrOffset
            # Windows NT6.1+
            elif firstStrOffset == 0x63:
                expectedLoader = 'NT6.1+ MBR'
                codeEnd = 0x100 + firstStrOffset
            else:
                self._suspiciousBehaviour.append(
                    'Invalid string offset: {0:#x}'.format(firstStrOffset))
                self._logger.debug(
                    'First localized string offset is wrong for a windows MBR.'
                    'It should be 0x2c, 0x62 or 0x63) : {0:#x}'.format(
                        firstStrOffset))

        self._logger.debug(
            'Expecting {0}. Code starts at {1:#x} and ends at {2:#x}'.format(
                expectedLoader, codeStart, codeEnd))

        invariantCode += rawCode[codeStart:codeEnd]
        return expectedLoader, invariantCode
Example #19
0
class CupsIppParser(interface.FileObjectParser):
    """Parser for CUPS IPP files. """

    NAME = u'cups_ipp'
    DESCRIPTION = u'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct(u'cups_ipp_header_struct',
                                       construct.UBInt8(u'major_version'),
                                       construct.UBInt8(u'minor_version'),
                                       construct.UBInt16(u'operation_id'),
                                       construct.UBInt32(u'request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID, per cups source file ipp-support.c.
    TYPE_GENERAL_INTEGER = 0x20
    TYPE_INTEGER = 0x21
    TYPE_BOOL = 0x22
    TYPE_ENUMERATION = 0x23
    TYPE_DATETIME = 0x31

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8(u'integer')
    INTEGER_32 = construct.UBInt32(u'integer')
    TEXT = construct.PascalString(u'text',
                                  encoding='utf-8',
                                  length_field=construct.UBInt8(u'length'))
    BOOLEAN = construct.Struct(u'boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct(u'integer_value', construct.Padding(1),
                               INTEGER_32)

    # This is an RFC 2579 datetime.
    DATETIME = construct.Struct(
        u'datetime',
        construct.Padding(1),
        construct.UBInt16(u'year'),
        construct.UBInt8(u'month'),
        construct.UBInt8(u'day'),
        construct.UBInt8(u'hour'),
        construct.UBInt8(u'minutes'),
        construct.UBInt8(u'seconds'),
        construct.UBInt8(u'deciseconds'),
        construct.String(u'direction_from_utc', length=1, encoding='ascii'),
        construct.UBInt8(u'hours_from_utc'),
        construct.UBInt8(u'minutes_from_utc'),
    )

    # Name of the pair.
    PAIR_NAME = construct.Struct(u'pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    NAME_PAIR_TRANSLATION = {
        u'printer-uri': u'uri',
        u'job-uuid': u'job_id',
        u'DestinationPrinterID': u'printer_id',
        u'job-originating-user-name': u'user',
        u'job-name': u'job_name',
        u'document-format': u'doc_type',
        u'job-originating-host-name': u'computer_name',
        u'com.apple.print.JobInfo.PMApplicationName': u'application',
        u'com.apple.print.JobInfo.PMJobOwner': u'owner'
    }

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a CUPS IPP file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse CUPS IPP Header with error: {0:s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            raise errors.UnableToParseFile(
                u'[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logging.debug(
                u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_mediator.GetDisplayName()))

        # Read the pairs extracting the name and the value.
        data_dict = {}
        name, value = self.ReadPair(parser_mediator, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self.ReadPair(parser_mediator, file_object)

        # TODO: Refactor to use a lookup table to do event production.
        time_dict = {}
        for key, value in data_dict.items():
            if key.startswith(u'date-time-') or key.startswith(u'time-'):
                time_dict[key] = value
                del data_dict[key]

        if u'date-time-at-creation' in time_dict:
            event_object = CupsIppEvent(time_dict[u'date-time-at-creation'][0],
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'date-time-at-processing' in time_dict:
            event_object = CupsIppEvent(
                time_dict[u'date-time-at-processing'][0],
                eventdata.EventTimestamp.START_TIME, data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'date-time-at-completed' in time_dict:
            event_object = CupsIppEvent(
                time_dict[u'date-time-at-completed'][0],
                eventdata.EventTimestamp.END_TIME, data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-creation' in time_dict:
            time_value = time_dict[u'time-at-creation'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-processing' in time_dict:
            time_value = time_dict[u'time-at-processing'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.START_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-completed' in time_dict:
            time_value = time_dict[u'time-at-completed'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.END_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

    def ReadPair(self, parser_mediator, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to a file.

    Returns:
      A list of name and value. If name and value cannot be read both are
      set to None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            _ = self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported identifier in file: {1:s}.'.format(
                    self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        # Name = Length name + name + 0x00
        try:
            name = self.PAIR_NAME.parse_stream(file_object).text
        except (IOError, construct.FieldError):
            logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format(
                self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        try:
            if type_id in [
                    self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                    self.TYPE_ENUMERATION
            ]:
                value = self.INTEGER.parse_stream(file_object).integer

            elif type_id == self.TYPE_BOOL:
                value = bool(self.BOOLEAN.parse_stream(file_object).integer)

            elif type_id == self.TYPE_DATETIME:
                datetime = self.DATETIME.parse_stream(file_object)
                value = timelib.Timestamp.FromRFC2579Datetime(
                    datetime.year, datetime.month, datetime.day, datetime.hour,
                    datetime.minutes, datetime.seconds, datetime.deciseconds,
                    datetime.direction_from_utc, datetime.hours_from_utc,
                    datetime.minutes_from_utc)

            else:
                value = self.TEXT.parse_stream(file_object)

        except (IOError, UnicodeDecodeError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported value in file: {1:s}.'.format(
                    self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        return name, value
Example #20
0
import construct as cst
import time
import sys

con = cst.Container  # alias

HEADER = cst.Struct('pc_header', cst.Magic('\x00'),
                    cst.Const(cst.UBInt8('lines'), 1), cst.UBInt8('address'),
                    cst.Magic('\x03'))

SER_STATUS = cst.BitStruct('serst', cst.Magic('\x01\x01\x00'),
                           cst.Flag('schedule_enabled'),
                           cst.Flag('ack_enabled'), cst.Flag('further_pages'),
                           cst.Flag('interrupt_mode'), cst.Magic('\x00'))

PAGE_IDX = cst.Bytes('page_num', 3)
TEMPO = cst.BitStruct(
    'tempo', cst.Magic('\x01\x01'),
    cst.Enum(cst.BitField('display_ctrl', 2), TIMED=0, FIXED_ON=1,
             FIXED_OFF=2),
    cst.Enum(cst.BitField('persist_time', 4),
             S2=1,
             S5=2,
             S10=3,
             S20=4,
             S30=5,
             S45=6,
             S60=7,
             S90=8,
             S120=9))
Example #21
0
class GzipFile(file_object_io.FileObjectIO):
  """Class that implements a file-like object of a gzip file.

     The gzip file is a zlib compressed data stream with additional metadata.
  """
  _FILE_HEADER_STRUCT = construct.Struct(
      u'file_header',
      construct.ULInt16(u'signature'),
      construct.UBInt8(u'compression_method'),
      construct.UBInt8(u'flags'),
      construct.SLInt32(u'modification_time'),
      construct.UBInt8(u'extra_flags'),
      construct.UBInt8(u'operating_system'))

  _FILE_FOOTER_STRUCT = construct.Struct(
      u'file_footer',
      construct.ULInt32(u'checksum'),
      construct.ULInt32(u'uncompressed_data_size'))

  _FILE_SIGNATURE = 0x8b1f

  _COMPRESSION_METHOD_DEFLATE = 8

  _FLAG_FTEXT = 0x01
  _FLAG_FHCRC = 0x02
  _FLAG_FEXTRA = 0x04
  _FLAG_FNAME = 0x08
  _FLAG_FCOMMENT = 0x10

  def __init__(self, resolver_context, file_object=None):
    """Initializes the file-like object.

    Args:
      resolver_context: the resolver context (instance of resolver.Context).
      file_object: optional file-like object. The default is None.

    Raises:
      ValueError: when file_object is set.
    """
    if file_object:
      raise ValueError(u'File object value set.')

    super(GzipFile, self).__init__(resolver_context)
    self._compressed_data_offset = -1
    self._compressed_data_size = -1
    self.comment = None
    self.modification_time = None
    self.operating_system = None
    self.original_filename = None
    self.uncompressed_data_size = 0

  def _ReadFileHeader(self, file_object):
    """Reads the file header.

    Args:
      file_object: the file-like object to read from.

    Raises:
      FileFormatError: if file format related errors are detected.
    """
    file_object.seek(0, os.SEEK_SET)
    file_header = self._FILE_HEADER_STRUCT.parse_stream(file_object)
    self._compressed_data_offset = file_object.get_offset()

    if file_header.signature != self._FILE_SIGNATURE:
      raise errors.FileFormatError(
          u'Unsuppored file signature: 0x{0:04x}.'.format(
              file_header.signature))

    if file_header.compression_method != self._COMPRESSION_METHOD_DEFLATE:
      raise errors.FileFormatError(
          u'Unsuppored compression method: {0:d}.'.format(
              file_header.compression_method))

    self.modification_time = file_header.modification_time
    self.operating_system = file_header.operating_system

    if file_header.flags & self._FLAG_FEXTRA:
      extra_field_data_size = construct.ULInt16(
          u'extra_field_data_size').parse_stream(file_object)
      file_object.seek(extra_field_data_size, os.SEEK_CUR)
      self._compressed_data_offset += 2 + extra_field_data_size

    if file_header.flags & self._FLAG_FNAME:
      # Since encoding is set construct will convert the C string to Unicode.
      # Note that construct 2 does not support the encoding to be a Unicode
      # string.
      self.original_filename = construct.CString(
          u'original_filename', encoding='iso-8859-1').parse_stream(
              file_object)
      self._compressed_data_offset = file_object.get_offset()

    if file_header.flags & self._FLAG_FCOMMENT:
      # Since encoding is set construct will convert the C string to Unicode.
      # Note that construct 2 does not support the encoding to be a Unicode
      # string.
      self.comment = construct.CString(
          u'comment', encoding='iso-8859-1').parse_stream(file_object)
      self._compressed_data_offset = file_object.get_offset()

    if file_header.flags & self._FLAG_FHCRC:
      self._compressed_data_offset += 2

    self._compressed_data_size = (
        file_object.get_size() - (self._compressed_data_offset + 8))

  def _ReadFileFooter(self, file_object):
    """Reads the file footer.

    Args:
      file_object: the file-like object to read from.

    Raises:
      FileFormatError: if file format related errors are detected.
    """
    file_object.seek(-8, os.SEEK_END)
    file_footer = self._FILE_FOOTER_STRUCT.parse_stream(file_object)

    self.uncompressed_data_size = file_footer.uncompressed_data_size

  def _OpenFileObject(self, path_spec):
    """Opens the file-like object defined by path specification.

    Args:
      path_spec: optional the path specification (instance of path.PathSpec).
                 The default is None.

    Returns:
      A file-like object.
    """
    gzip_file_object = resolver.Resolver.OpenFileObject(
        path_spec.parent, resolver_context=self._resolver_context)

    try:
      self._ReadFileHeader(gzip_file_object)
      self._ReadFileFooter(gzip_file_object)

    finally:
      gzip_file_object.close()

    path_spec_data_range = data_range_path_spec.DataRangePathSpec(
        range_offset=self._compressed_data_offset,
        range_size=self._compressed_data_size, parent=path_spec.parent)
    path_spec_compressed_stream = (
        compressed_stream_path_spec.CompressedStreamPathSpec(
            compression_method=definitions.COMPRESSION_METHOD_DEFLATE,
            parent=path_spec_data_range))

    return resolver.Resolver.OpenFileObject(
        path_spec_compressed_stream, resolver_context=self._resolver_context)
Example #22
0
class BSMParser(interface.FileObjectParser):
  """Parser for BSM files."""

  NAME = 'bsm_log'
  DESCRIPTION = 'Parser for BSM log files.'

  # BSM supported version (0x0b = 11).
  AUDIT_HEADER_VERSION = 11

  # Magic Trail Header.
  BSM_TOKEN_TRAILER_MAGIC = b'b105'

  # IP Version constants.
  AU_IPv4 = 4
  AU_IPv6 = 16

  IPV4_STRUCT = construct.UBInt32('ipv4')

  IPV6_STRUCT = construct.Struct(
      'ipv6',
      construct.UBInt64('high'),
      construct.UBInt64('low'))

  # Tested structures.
  # INFO: I have ommited the ID in the structures declaration.
  #       I used the BSM_TYPE first to read the ID, and then, the structure.
  # Tokens always start with an ID value that identifies their token
  # type and subsequent structure.
  _BSM_TOKEN = construct.UBInt8('token_id')

  # Data type structures.
  BSM_TOKEN_DATA_CHAR = construct.String('value', 1)
  BSM_TOKEN_DATA_SHORT = construct.UBInt16('value')
  BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value')

  # Common structure used by other structures.
  # audit_uid: integer, uid that generates the entry.
  # effective_uid: integer, the permission user used.
  # effective_gid: integer, the permission group used.
  # real_uid: integer, user id of the user that execute the process.
  # real_gid: integer, group id of the group that execute the process.
  # pid: integer, identification number of the process.
  # session_id: unknown, need research.
  BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
      'subject_data',
      construct.UBInt32('audit_uid'),
      construct.UBInt32('effective_uid'),
      construct.UBInt32('effective_gid'),
      construct.UBInt32('real_uid'),
      construct.UBInt32('real_gid'),
      construct.UBInt32('pid'),
      construct.UBInt32('session_id'))

  # Common structure used by other structures.
  # Identify the kind of inet (IPv4 or IPv6)
  # TODO: instead of 16, AU_IPv6 must be used.
  BSM_IP_TYPE_SHORT = construct.Struct(
      'bsm_ip_type_short',
      construct.UBInt32('net_type'),
      construct.Switch(
          'ip_addr',
          _BSMTokenGetNetType,
          {16: IPV6_STRUCT},
          default=IPV4_STRUCT))

  # Initial fields structure used by header structures.
  # length: integer, the length of the entry, equal to trailer (doc: length).
  # version: integer, version of BSM (AUDIT_HEADER_VERSION).
  # event_type: integer, the type of event (/etc/security/audit_event).
  # modifier: integer, unknown, need research (It is always 0).
  BSM_HEADER = construct.Struct(
      'bsm_header',
      construct.UBInt32('length'),
      construct.UBInt8('version'),
      construct.UBInt16('event_type'),
      construct.UBInt16('modifier'))

  # First token of one entry.
  # timestamp: unsigned integer, number of seconds since
  #            January 1, 1970 00:00:00 UTC.
  # microseconds: unsigned integer, number of micro seconds.
  BSM_HEADER32 = construct.Struct(
      'bsm_header32',
      BSM_HEADER,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  BSM_HEADER64 = construct.Struct(
      'bsm_header64',
      BSM_HEADER,
      construct.UBInt64('timestamp'),
      construct.UBInt64('microseconds'))

  BSM_HEADER32_EX = construct.Struct(
      'bsm_header32_ex',
      BSM_HEADER,
      BSM_IP_TYPE_SHORT,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  # Token TEXT, provides extra information.
  BSM_TOKEN_TEXT = construct.Struct(
      'bsm_token_text',
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Path of the executable.
  BSM_TOKEN_PATH = BSM_TOKEN_TEXT

  # Identified the end of the record (follow by TRAILER).
  # status: integer that identifies the status of the exit (BSM_ERRORS).
  # return: returned value from the operation.
  BSM_TOKEN_RETURN32 = construct.Struct(
      'bsm_token_return32',
      construct.UBInt8('status'),
      construct.UBInt32('return_value'))

  BSM_TOKEN_RETURN64 = construct.Struct(
      'bsm_token_return64',
      construct.UBInt8('status'),
      construct.UBInt64('return_value'))

  # Identified the number of bytes that was written.
  # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
  # length: integer that has the number of bytes from the entry size.
  BSM_TOKEN_TRAILER = construct.Struct(
      'bsm_token_trailer',
      construct.UBInt16('magic'),
      construct.UBInt32('record_length'))

  # A 32-bits argument.
  # num_arg: the number of the argument.
  # name_arg: the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT32 = construct.Struct(
      'bsm_token_argument32',
      construct.UBInt8('num_arg'),
      construct.UBInt32('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # A 64-bits argument.
  # num_arg: integer, the number of the argument.
  # name_arg: text, the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT64 = construct.Struct(
      'bsm_token_argument64',
      construct.UBInt8('num_arg'),
      construct.UBInt64('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Identify an user.
  # terminal_id: unknown, research needed.
  # terminal_addr: unknown, research needed.
  BSM_TOKEN_SUBJECT32 = construct.Struct(
      'bsm_token_subject32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # Identify an user using a extended Token.
  # terminal_port: unknown, need research.
  # net_type: unknown, need research.
  BSM_TOKEN_SUBJECT32_EX = construct.Struct(
      'bsm_token_subject32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_opaque // AUT_OPAQUE
  BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

  # au_to_seq // AUT_SEQ
  BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

  # Program execution with options.
  # For each argument we are going to have a string+ "\x00".
  # Example: [00 00 00 02][41 42 43 00 42 42 00]
  #          2 Arguments, Arg1: [414243] Arg2: [4242].
  BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments')

  BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
      'bsm_token_exec_argument',
      construct.RepeatUntil(
          _BSMTokenIsEndOfString, construct.StaticField("text", 1)))

  # au_to_in_addr // AUT_IN_ADDR:
  BSM_TOKEN_ADDR = IPV4_STRUCT

  # au_to_in_addr_ext // AUT_IN_ADDR_EX:
  BSM_TOKEN_ADDR_EXT = construct.Struct(
      'bsm_token_addr_ext',
      construct.UBInt32('net_type'),
      IPV6_STRUCT)

  # au_to_ip // AUT_IP:
  # TODO: parse this header in the correct way.
  BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20)

  # au_to_ipc // AUT_IPC:
  BSM_TOKEN_IPC = construct.Struct(
      'bsm_token_ipc',
      construct.UBInt8('object_type'),
      construct.UBInt32('object_id'))

  # au_to_ipc_perm // au_to_ipc_perm
  BSM_TOKEN_IPC_PERM = construct.Struct(
      'bsm_token_ipc_perm',
      construct.UBInt32('user_id'),
      construct.UBInt32('group_id'),
      construct.UBInt32('creator_user_id'),
      construct.UBInt32('creator_group_id'),
      construct.UBInt32('access_mode'),
      construct.UBInt32('slot_seq'),
      construct.UBInt32('key'))

  # au_to_iport // AUT_IPORT:
  BSM_TOKEN_PORT = construct.UBInt16('port_number')

  # au_to_file // AUT_OTHER_FILE32:
  BSM_TOKEN_FILE = construct.Struct(
      'bsm_token_file',
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # au_to_subject64 // AUT_SUBJECT64:
  BSM_TOKEN_SUBJECT64 = construct.Struct(
      'bsm_token_subject64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_subject64_ex // AU_IPv4:
  BSM_TOKEN_SUBJECT64_EX = construct.Struct(
      'bsm_token_subject64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      construct.UBInt32('terminal_type'),
      BSM_IP_TYPE_SHORT)

  # au_to_process32 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS32 = construct.Struct(
      'bsm_token_process32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # au_to_process64 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS64 = construct.Struct(
      'bsm_token_process64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_process32_ex // AUT_PROCESS32_EX:
  BSM_TOKEN_PROCESS32_EX = construct.Struct(
      'bsm_token_process32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_process64_ex // AUT_PROCESS64_EX:
  BSM_TOKEN_PROCESS64_EX = construct.Struct(
      'bsm_token_process64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_sock_inet32 // AUT_SOCKINET32:
  BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
      'bsm_token_aut_sockinet32',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV4_STRUCT)

  # Info: checked against the source code of XNU, but not against
  #       real BSM file.
  BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
      'bsm_token_aut_sockinet128',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV6_STRUCT)

  INET6_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt64('saddr_high'),
      construct.UBInt64('saddr_low'),
      construct.UBInt16('destination_port'),
      construct.UBInt64('daddr_high'),
      construct.UBInt64('daddr_low'))

  INET4_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt32('source_address'),
      construct.UBInt16('destination_port'),
      construct.UBInt32('destination_address'))

  # au_to_socket_ex // AUT_SOCKET_EX
  # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
  BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
      'bsm_token_aut_sockinet32_ex',
      construct.UBInt16('socket_domain'),
      construct.UBInt16('socket_type'),
      construct.Switch(
          'structure_addr_port',
          _BSMTokenGetSocketDomain,
          {26: INET6_ADDR_TYPE},
          default=INET4_ADDR_TYPE))

  # au_to_sock_unix // AUT_SOCKUNIX
  BSM_TOKEN_SOCKET_UNIX = construct.Struct(
      'bsm_token_au_to_sock_unix',
      construct.UBInt16('family'),
      construct.RepeatUntil(
          _BSMTokenIsEndOfString,
          construct.StaticField("path", 1)))

  # au_to_data // au_to_data
  # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
  # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
  # unit_count: number of type values.
  # BSM_TOKEN_DATA has a end field = type * unit_count
  BSM_TOKEN_DATA = construct.Struct(
      'bsm_token_data',
      construct.UBInt8('how_to_print'),
      construct.UBInt8('data_type'),
      construct.UBInt8('unit_count'))

  # au_to_attr32 // AUT_ATTR32
  BSM_TOKEN_ATTR32 = construct.Struct(
      'bsm_token_attr32',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt32('device'))

  # au_to_attr64 // AUT_ATTR64
  BSM_TOKEN_ATTR64 = construct.Struct(
      'bsm_token_attr64',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt64('device'))

  # au_to_exit // AUT_EXIT
  BSM_TOKEN_EXIT = construct.Struct(
      'bsm_token_exit',
      construct.UBInt32('status'),
      construct.UBInt32('return_value'))

  # au_to_newgroups // AUT_NEWGROUPS
  # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
  BSM_TOKEN_GROUPS = construct.UBInt16('group_number')

  # au_to_exec_env == au_to_exec_args
  BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

  # au_to_zonename //AUT_ZONENAME
  BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

  # Token ID.
  # List of valid Token_ID.
  # Token_ID -> (NAME_STRUCTURE, STRUCTURE)
  # Only the checked structures are been added to the valid structures lists.
  _BSM_TOKEN_TYPES = {
      17: ('BSM_TOKEN_FILE', BSM_TOKEN_FILE),
      19: ('BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER),
      20: ('BSM_HEADER32', BSM_HEADER32),
      21: ('BSM_HEADER64', BSM_HEADER64),
      33: ('BSM_TOKEN_DATA', BSM_TOKEN_DATA),
      34: ('BSM_TOKEN_IPC', BSM_TOKEN_IPC),
      35: ('BSM_TOKEN_PATH', BSM_TOKEN_PATH),
      36: ('BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32),
      38: ('BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32),
      39: ('BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32),
      40: ('BSM_TOKEN_TEXT', BSM_TOKEN_TEXT),
      41: ('BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE),
      42: ('BSM_TOKEN_ADDR', BSM_TOKEN_ADDR),
      43: ('BSM_TOKEN_IP', BSM_TOKEN_IP),
      44: ('BSM_TOKEN_PORT', BSM_TOKEN_PORT),
      45: ('BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32),
      47: ('BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE),
      96: ('BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME),
      113: ('BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64),
      114: ('BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64),
      116: ('BSM_HEADER32_EX', BSM_HEADER32_EX),
      119: ('BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64),
      122: ('BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX),
      127: ('BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX),
      128: ('BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32)}

  # Untested structures.
  # When not tested structure is found, we try to parse using also
  # these structures.
  BSM_TYPE_LIST_NOT_TESTED = {
      49: ('BSM_TOKEN_ATTR', BSM_TOKEN_ATTR32),
      50: ('BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM),
      52: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      59: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      60: ('BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS),
      61: ('BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV),
      62: ('BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32),
      82: ('BSM_TOKEN_EXIT', BSM_TOKEN_EXIT),
      115: ('BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64),
      117: ('BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64),
      123: ('BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX),
      124: ('BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX),
      125: ('BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX),
      126: ('BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT),
      129: ('BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128),
      130: ('BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX)}

  MESSAGE_CAN_NOT_SAVE = (
      'Plaso: some tokens from this entry can not be saved. Entry at 0x{0:X} '
      'with unknown token id "0x{1:X}".')

  # BSM token types:
  # https://github.com/openbsm/openbsm/blob/master/sys/bsm/audit_record.h
  _BSM_TOKEN_TYPE_ARGUMENT32 = 45
  _BSM_TOKEN_TYPE_ARGUMENT64 = 113
  _BSM_TOKEN_TYPE_ATTR = 49
  _BSM_TOKEN_TYPE_ATTR32 = 62
  _BSM_TOKEN_TYPE_ATTR64 = 115
  _BSM_TOKEN_TYPE_EXEC_ARGUMENTS = 60
  _BSM_TOKEN_TYPE_EXEC_ENV = 61
  _BSM_TOKEN_TYPE_EXIT = 82
  _BSM_TOKEN_TYPE_HEADER32 = 20
  _BSM_TOKEN_TYPE_HEADER32_EX = 116
  _BSM_TOKEN_TYPE_HEADER64 = 21
  _BSM_TOKEN_TYPE_PATH = 35
  _BSM_TOKEN_TYPE_PROCESS32 = 38
  _BSM_TOKEN_TYPE_PROCESS32_EX = 123
  _BSM_TOKEN_TYPE_PROCESS64 = 119
  _BSM_TOKEN_TYPE_PROCESS64_EX = 124
  _BSM_TOKEN_TYPE_RETURN32 = 39
  _BSM_TOKEN_TYPE_RETURN64 = 114
  _BSM_TOKEN_TYPE_SUBJECT32 = 36
  _BSM_TOKEN_TYPE_SUBJECT32_EX = 122
  _BSM_TOKEN_TYPE_SUBJECT64 = 117
  _BSM_TOKEN_TYPE_SUBJECT64_EX = 125
  _BSM_TOKEN_TYPE_TEXT = 40
  _BSM_TOKEN_TYPE_ZONENAME = 96

  _BSM_ARGUMENT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ARGUMENT32,
      _BSM_TOKEN_TYPE_ARGUMENT64)

  _BSM_ATTR_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ATTR,
      _BSM_TOKEN_TYPE_ATTR32,
      _BSM_TOKEN_TYPE_ATTR64)

  _BSM_EXEV_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXEC_ARGUMENTS,
      _BSM_TOKEN_TYPE_EXEC_ENV)

  _BSM_HEADER_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_HEADER32,
      _BSM_TOKEN_TYPE_HEADER32_EX,
      _BSM_TOKEN_TYPE_HEADER64)

  _BSM_PROCESS_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32,
      _BSM_TOKEN_TYPE_PROCESS64)

  _BSM_PROCESS_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32_EX,
      _BSM_TOKEN_TYPE_PROCESS64_EX)

  _BSM_RETURN_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXIT,
      _BSM_TOKEN_TYPE_RETURN32,
      _BSM_TOKEN_TYPE_RETURN64)

  _BSM_SUBJECT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32,
      _BSM_TOKEN_TYPE_SUBJECT64)

  _BSM_SUBJECT_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32_EX,
      _BSM_TOKEN_TYPE_SUBJECT64_EX)

  _BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PATH,
      _BSM_TOKEN_TYPE_TEXT,
      _BSM_TOKEN_TYPE_ZONENAME)

  def __init__(self):
    """Initializes a parser object."""
    super(BSMParser, self).__init__()
    # Create the dictionary with all token IDs: tested and untested.
    self._bsm_type_list_all = self._BSM_TOKEN_TYPES.copy()
    self._bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

  def _CopyByteArrayToBase16String(self, byte_array):
    """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array (bytes): A byte array.

    Returns:
      str: a base-16 encoded Unicode string.
    """
    return ''.join(['{0:02x}'.format(byte) for byte in byte_array])

  def _CopyUtf8ByteArrayToString(self, byte_array):
    """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array (bytes): A byte array containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    byte_stream = b''.join(map(chr, byte_array))

    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning('Unable to decode UTF-8 formatted byte array.')
      string = byte_stream.decode('utf-8', errors='ignore')

    string, _, _ = string.partition(b'\x00')
    return string

  def _IPv4Format(self, address):
    """Formats an IPv4 address as a human readable string.

    Args:
      address (int): IPv4 address.

    Returns:
      str: human readable string of IPv4 address in 4 octet representation:
          "1.2.3.4".
    """
    ipv4_string = self.IPV4_STRUCT.build(address)
    return socket.inet_ntoa(ipv4_string)

  def _IPv6Format(self, high, low):
    """Formats an IPv6 address as a human readable string.

    Args:
      high (int): upper 64-bit part of the IPv6 address.
      low (int): lower 64-bit part of the IPv6 address.

    Returns:
      str: human readable string of IPv6 address.
    """
    ipv6_string = self.IPV6_STRUCT.build(
        construct.Container(high=high, low=low))
    # socket.inet_ntop not supported in Windows.
    if hasattr(socket, 'inet_ntop'):
      return socket.inet_ntop(socket.AF_INET6, ipv6_string)

    # TODO: this approach returns double "::", illegal IPv6 addr.
    str_address = binascii.hexlify(ipv6_string)
    address = []
    blank = False
    for pos in range(0, len(str_address), 4):
      if str_address[pos:pos + 4] == '0000':
        if not blank:
          address.append('')
          blank = True
      else:
        blank = False
        address.append(str_address[pos:pos + 4].lstrip('0'))
    return ':'.join(address)

  def _ParseBSMEvent(self, parser_mediator, file_object):
    """Parses a BSM entry (BSMEvent) from the file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if the BSM entry was parsed.
    """
    record_start_offset = file_object.tell()

    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM token type at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      parser_mediator.ProduceExtractionError(
          'unsupported token type: {0:d} at offset: 0x{1:08x}.'.format(
              token_type, record_start_offset))
      # TODO: if it is a Mac OS X, search for the trailer magic value
      #       as a end of the entry can be a possibility to continue.
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      token = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM record at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    event_type = bsmtoken.BSM_AUDIT_EVENT.get(
        token.bsm_header.event_type, 'UNKNOWN')
    event_type = '{0:s} ({1:d})'.format(
        event_type, token.bsm_header.event_type)

    timestamp = (token.timestamp * 1000000) + token.microseconds
    date_time = dfdatetime_posix_time.PosixTimeInMicroseconds(
        timestamp=timestamp)

    record_length = token.bsm_header.length
    record_end_offset = record_start_offset + record_length

    # A dict of tokens that has the entry.
    extra_tokens = {}

    # Read until we reach the end of the record.
    while file_object.tell() < record_end_offset:
      # Check if it is a known token.
      try:
        token_type = self._BSM_TOKEN.parse_stream(file_object)
      except (IOError, construct.FieldError):
        logging.warning(
            'Unable to parse the Token ID at position: {0:d}'.format(
                file_object.tell()))
        return False

      _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

      if not record_structure:
        pending = record_end_offset - file_object.tell()
        new_extra_tokens = self.TryWithUntestedStructures(
            file_object, token_type, pending)
        extra_tokens.update(new_extra_tokens)
      else:
        token = record_structure.parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_type, token, file_object)
        extra_tokens.update(new_extra_tokens)

    if file_object.tell() > record_end_offset:
      logging.warning(
          'Token ID {0:d} not expected at position 0x{1:08x}.'
          'Jumping for the next entry.'.format(
              token_type, file_object.tell()))
      try:
        file_object.seek(
            record_end_offset - file_object.tell(), os.SEEK_CUR)
      except (IOError, construct.FieldError) as exception:
        logging.warning(
            'Unable to jump to next entry with error: {0:s}'.format(exception))
        return False

    # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
    if parser_mediator.platform != 'MacOSX':
      event_data = BSMEventData()
    else:
      event_data = MacBSMEventData()

      # In Mac OS X the last two tokens are the return status and the trailer.
      return_value = extra_tokens.get('BSM_TOKEN_RETURN32')
      if not return_value:
        return_value = extra_tokens.get('BSM_TOKEN_RETURN64')
      if not return_value:
        return_value = 'UNKNOWN'

      event_data.return_value = return_value

    event_data.event_type = event_type
    event_data.extra_tokens = extra_tokens
    event_data.offset = record_start_offset
    event_data.record_length = record_length

    # TODO: check why trailer was passed to event in original while
    # event was expecting record length.
    # if extra_tokens:
    #   trailer = extra_tokens.get('BSM_TOKEN_TRAILER', 'unknown')

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_CREATION)
    parser_mediator.ProduceEventWithEventData(event, event_data)

    return True

  def _RawToUTF8(self, byte_stream):
    """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream (bytes): byte stream containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning(
          'Decode UTF8 failed, the message string may be cut short.')
      string = byte_stream.decode('utf-8', errors='ignore')
    return string.partition(b'\x00')[0]

  def ParseFileObject(self, parser_mediator, file_object, **kwargs):
    """Parses a BSM file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    try:
      is_bsm = self.VerifyFile(parser_mediator, file_object)
    except (IOError, construct.FieldError) as exception:
      raise errors.UnableToParseFile(
          'Unable to parse BSM file with error: {0:s}'.format(exception))

    if not is_bsm:
      raise errors.UnableToParseFile('Not a BSM File, unable to parse.')

    file_object.seek(0, os.SEEK_SET)

    while self._ParseBSMEvent(parser_mediator, file_object):
      pass

  def VerifyFile(self, parser_mediator, file_object):
    """Check if the file is a BSM file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if this is a valid BSM file, False otherwise.
    """
    # First part of the entry is always a Header.
    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      header = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
      return False

    try:
      token_identifier = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    # If is Mac OS X BSM file, next entry is a  text token indicating
    # if it is a normal start or it is a recovery track.
    if parser_mediator.platform == 'MacOSX':
      token_type, record_structure = self._BSM_TOKEN_TYPES.get(
          token_identifier, ('', None))

      if not record_structure:
        return False

      if token_type != 'BSM_TOKEN_TEXT':
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

      try:
        token = record_structure.parse_stream(file_object)
      except (IOError, construct.FieldError):
        return

      text = self._CopyUtf8ByteArrayToString(token.text)
      if (text != 'launchctl::Audit startup' and
          text != 'launchctl::Audit recovery'):
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

    return True

  def TryWithUntestedStructures(self, file_object, token_id, pending):
    """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
    # Data from the unknown structure.
    start_position = file_object.tell()
    start_token_id = token_id
    extra_tokens = {}

    # Read all the "pending" bytes.
    try:
      if token_id in self._bsm_type_list_all:
        token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_id, token, file_object)
        extra_tokens.update(new_extra_tokens)
        while file_object.tell() < (start_position + pending):
          # Check if it is a known token.
          try:
            token_id = self._BSM_TOKEN.parse_stream(file_object)
          except (IOError, construct.FieldError):
            logging.warning(
                'Unable to parse the Token ID at position: {0:d}'.format(
                    file_object.tell()))
            return
          if token_id not in self._bsm_type_list_all:
            break
          token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
          new_extra_tokens = self.FormatToken(token_id, token, file_object)
          extra_tokens.update(new_extra_tokens)
    except (IOError, construct.FieldError):
      token_id = 255

    next_entry = (start_position + pending)
    if file_object.tell() != next_entry:
      # Unknown Structure.
      logging.warning('Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
          start_position - 1, token_id, token_id))
      # TODO: another way to save this information must be found.
      extra_tokens.update(
          {'message': self.MESSAGE_CAN_NOT_SAVE.format(
              start_position - 1, start_token_id)})
      # Move to next entry.
      file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
      # It returns null list because it doesn't know witch structure was
      # the incorrect structure that makes that it can arrive to the spected
      # end of the entry.
      return {}
    return extra_tokens

  def FormatToken(self, token_id, token, file_object):
    """Parse the Token depending of the type of the structure.

    Args:
      token_id (int): identification of the token_type.
      token (structure): token struct to parse.
      file_object: BSM file.

    Returns:
      (dict): parsed Token values.

    Keys for returned dictionary are token name like BSM_TOKEN_SUBJECT32.
    Values of this dictionary are key-value pairs like terminal_ip:127.0.0.1.
    """
    if token_id not in self._bsm_type_list_all:
      return {}

    bsm_type, _ = self._bsm_type_list_all.get(token_id, ['', ''])

    if token_id in self._BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES:
      try:
        string = self._CopyUtf8ByteArrayToString(token.text)
      except TypeError:
        string = 'Unknown'
      return {bsm_type: string}

    elif token_id in self._BSM_RETURN_TOKEN_TYPES:
      return {bsm_type: {
          'error': bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'),
          'token_status': token.status,
          'call_status': token.return_value
      }}

    elif token_id in self._BSM_SUBJECT_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_SUBJECT_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif token_id in self._BSM_ARGUMENT_TOKEN_TYPES:
      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {
          'string': string,
          'num_arg': token.num_arg,
          'is': token.name_arg}}

    elif token_id in self._BSM_EXEV_TOKEN_TYPES:
      arguments = []
      for _ in range(0, token):
        sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object)
        string = self._CopyUtf8ByteArrayToString(sub_token.text)
        arguments.append(string)
      return {bsm_type: ' '.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv4Format(token.ipv4)
      }}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_ADDR':
      return {bsm_type: self._IPv4Format(token)}

    elif bsm_type == 'BSM_TOKEN_IP':
      return {'IPv4_Header': '0x{0:s}]'.format(token.encode('hex'))}

    elif bsm_type == 'BSM_TOKEN_ADDR_EXT':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_PORT':
      return {bsm_type: token}

    elif bsm_type == 'BSM_TOKEN_TRAILER':
      return {bsm_type: token.record_length}

    elif bsm_type == 'BSM_TOKEN_FILE':
      # TODO: if this timestamp is usefull, it must be extracted as a separate
      #       event object.
      timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
          token.timestamp, token.microseconds)
      date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
      date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S')

      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {'string': string, 'timestamp': date_time_string}}

    elif bsm_type == 'BSM_TOKEN_IPC':
      return {bsm_type: {
          'object_type': token.object_type,
          'object_id': token.object_id
      }}

    elif token_id in self._BSM_PROCESS_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_PROCESS_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif bsm_type == 'BSM_TOKEN_DATA':
      data = []
      data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '')

      if data_type == 'AUR_CHAR':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))

      elif data_type == 'AUR_SHORT':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_SHORT.parse_stream(file_object))

      elif data_type == 'AUR_INT32':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))

      else:
        data.append('Unknown type data')

      # TODO: the data when it is string ends with ".", HW a space is return
      #       after uses the UTF-8 conversion.
      return {bsm_type: {
          'format': bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
          'data':
          '{0}'.format(self._RawToUTF8(''.join(map(str, data))))
      }}

    elif token_id in self._BSM_ATTR_TOKEN_TYPES:
      return {bsm_type: {
          'mode': token.file_mode,
          'uid': token.uid,
          'gid': token.gid,
          'system_id': token.file_system_id,
          'node_id': token.file_system_node_id,
          'device': token.device}}

    elif bsm_type == 'BSM_TOKEN_GROUPS':
      arguments = []
      for _ in range(token):
        arguments.append(
            self._RawToUTF8(
                self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
      return {bsm_type: ','.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX':
      if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6':
        saddr = self._IPv6Format(
            token.structure_addr_port.saddr_high,
            token.structure_addr_port.saddr_low)
        daddr = self._IPv6Format(
            token.structure_addr_port.daddr_high,
            token.structure_addr_port.daddr_low)
      else:
        saddr = self._IPv4Format(token.structure_addr_port.source_address)
        daddr = self._IPv4Format(token.structure_addr_port.destination_address)

      return {bsm_type:{
          'from': saddr,
          'from_port': token.structure_addr_port.source_port,
          'to': daddr,
          'to_port': token.structure_addr_port.destination_port}}

    elif bsm_type == 'BSM_TOKEN_IPC_PERM':
      return {bsm_type: {
          'user_id': token.user_id,
          'group_id': token.group_id,
          'creator_user_id': token.creator_user_id,
          'creator_group_id': token.creator_group_id,
          'access': token.access_mode}}

    elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX':
      string = self._CopyUtf8ByteArrayToString(token.path)
      return {bsm_type: {'family': token.family, 'path': string}}

    elif bsm_type == 'BSM_TOKEN_OPAQUE':
      string = self._CopyByteArrayToBase16String(token.text)
      return {bsm_type: string}

    elif bsm_type == 'BSM_TOKEN_SEQUENCE':
      return {bsm_type: token}
Example #23
0
    c.UBInt32('event'),
    c.UBInt32('ip_addr'),
    c.UBInt32('key'),
    c.SBInt32('num_want'),
    c.UBInt16('port'),
)

announce_resp = c.Struct('response', 
    c.UBInt32('action'), 
    c.UBInt32('transaction_id'),
    c.UBInt32('interval'),
    c.UBInt32('leechers'),
    c.UBInt32('seeders'),
    c.GreedyRange(
        c.Struct('peer',
            c.Array(4, c.UBInt8('addr')),
            c.UBInt16('port')
        )
    )
)

scrape_req = c.Struct('request', 
    c.UBInt64('connection_id'),
    c.UBInt32('action'),
    c.UBInt32('transaction_id'),
    c.GreedyRange(
        c.Struct('hashes',
            c.Bytes('info_hash', 20),
        )
    )
)
Example #24
0
class JavaIDXParser(interface.BaseParser):
    """Parse Java IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    NAME = 'java_idx'
    DESCRIPTION = u'Parser for Java IDX files.'

    IDX_SHORT_STRUCT = construct.Struct('magic', construct.UBInt8('busy'),
                                        construct.UBInt8('incomplete'),
                                        construct.UBInt32('idx_version'))

    IDX_602_STRUCT = construct.Struct(
        'IDX_602_Full', construct.UBInt16('null_space'),
        construct.UBInt8('shortcut'), construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.PascalString('version_string',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespace',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        'IDX_605_Section1', construct.UBInt8('shortcut'),
        construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.UBInt64('validation_date'), construct.UBInt8('signed'),
        construct.UBInt32('sec2len'), construct.UBInt32('sec3len'),
        construct.UBInt32('sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        'IDX_605_Section2',
        construct.PascalString('version',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespec',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('ip_address',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        'Java.ReadUTF',
        construct.PascalString('string',
                               length_field=construct.UBInt16('length')))

    def Parse(self, parser_context, file_entry):
        """Extract data from a Java cache IDX file.

    This is the main parsing engine for the parser. It determines if
    the selected file is a proper IDX file. It then checks the file
    version to determine the correct structure to apply to extract
    data.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = 'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = 'Unknown'
                ip_address = 'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == 'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                download_date = timelib.Timestamp.FromTimeString(
                    value.string, gmt_as_timezone=False)

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        last_modified_timestamp = timelib.Timestamp.FromJavaTime(
            last_modified_date)
        # TODO: Move the timestamp description fields into eventdata.
        event_object = JavaIDXEvent(last_modified_timestamp,
                                    'File Hosted Date', magic.idx_version, url,
                                    ip_address)
        parser_context.ProduceEvent(event_object,
                                    parser_name=self.NAME,
                                    file_entry=file_entry)

        if section_one:
            expiration_date = section_one.get('expiration_date', None)
            if expiration_date:
                expiration_timestamp = timelib.Timestamp.FromJavaTime(
                    expiration_date)
                event_object = JavaIDXEvent(expiration_timestamp,
                                            'File Expiration Date',
                                            magic.idx_version, url, ip_address)
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

        if download_date:
            event_object = JavaIDXEvent(
                download_date, eventdata.EventTimestamp.FILE_DOWNLOADED,
                magic.idx_version, url, ip_address)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)
Example #25
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cups Reading Control Files."""

# IMPORTANT: DIRTY PARSE...

# MSc Project in Royal Holloway, University of London.
__author__ = 'Joaquin Moreno Garijo ([email protected])'

import datetime
import construct
import sys

header = construct.Padding(11)
attr_id = construct.UBInt8('type')
attr_text = construct.CString('text')
attr_time = construct.Struct('time', construct.UBInt32('timestamp'),
                             construct.UBInt16('other'))


class ControlFile(object):
    def __init__(self):
        self.crt_time = 0
        self.proc_time = 0
        self.comp_time = 0
        self.data = []


def printValue(name, value):
    # print u'{}: {}'.format(name, value)