Beispiel #1
0
    def _ReadFileHeader(self):
        """Reads the file header.

    Raises:
      IOError: if the file header cannot be read.
    """
        if self._debug:
            print(u'Seeking file header offset: 0x{0:08x}'.format(0))

        self._file_object.seek(0, os.SEEK_SET)

        file_header_data = self._file_object.read(self._FILE_HEADER.sizeof())

        if self._debug:
            print(u'File header data:')
            print(hexdump.Hexdump(file_header_data))

        try:
            file_header_struct = self._FILE_HEADER.parse(file_header_data)
        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse file header with error: {0:s}'.format(
                    exception))

        if self._debug:
            print(u'Signature\t\t\t\t\t\t\t: {0!s}'.format(
                file_header_struct.signature))
            print(u'Number of pages\t\t\t\t\t\t\t: {0:d}'.format(
                file_header_struct.number_of_pages))

            print(u'')

        page_sizes_data_size = file_header_struct.number_of_pages * 4

        page_sizes_data = self._file_object.read(page_sizes_data_size)

        if self._debug:
            print(u'Page sizes data:')
            print(hexdump.Hexdump(page_sizes_data))

        try:
            page_sizes_array = construct.Array(
                file_header_struct.number_of_pages,
                construct.UBInt32(u'page_sizes')).parse(page_sizes_data)

        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse page sizes array with error: {0:s}'.format(
                    exception))

        self._page_sizes = []
        for page_index in range(file_header_struct.number_of_pages):
            self._page_sizes.append(page_sizes_array[page_index])

            if self._debug:
                print(u'Page: {0:d} size\t\t\t\t\t\t\t: {1:d}'.format(
                    page_index, page_sizes_array[page_index]))

        if self._debug:
            print(u'')
Beispiel #2
0
 def recv_cmd(self):
     lentype = c.UBInt32('length')
     L = self.conn.recv(lentype.sizeof())
     n = lentype.parse(L)
     data = self.conn.recv(n)
     msg = L + data
     return parse_command(msg)
Beispiel #3
0
    def configure(self, limit):
        """
    Configures a specific limit.

    :param limit: Limit message type
    """
        if limit.type == LIMIT_TYPE_BANDWIDTH_DOWN:
            # Downstream (client-wise) limit setup
            try:
                bandwidth = cs.UBInt32("bandwidth").parse(limit.data)
            except cs.ConstructError:
                logger.warning(
                    "Invalid bandwidth limit requested on tunnel %d." %
                    self.tunnel.id)
                return

            logger.info("Setting bandwidth limit to %d kbps on tunnel %d." %
                        (bandwidth, self.tunnel.id))

            # Setup bandwidth limit using Linux traffic shaping
            try:
                tc = traffic_control.TrafficControl(
                    self.tunnel.sessions.values()[0].name)
                tc.reset()
                tc.set_fixed_bandwidth(bandwidth)
            except traffic_control.TrafficControlError:
                logger.warning(
                    "Unable to configure traffic shaping rules for tunnel %d."
                    % self.tunnel.id)

            return True
        else:
            return False
Beispiel #4
0
    def handle(self, socket, data, address):
        """
    Handles a single message of the control protocol.

    :param socket: Socket to use for outgoing messages
    :param data: Data that has been received
    :param address: Address where data has been received from
    :return: Message if the message needs further processing, None
      otherwise
    """
        try:
            msg = ControlMessage.parse(data)
        except cs.ConstructError:
            return

        # Parsing successful check message type
        if msg.type == CONTROL_TYPE_COOKIE:
            # Cookie request, ensure that the payload is at least 8 bytes, so
            # this protocol will not be a DoS amplifier by spamming with cookies
            if len(msg.data) < 8:
                return

            # Respond with a cookie
            self.send_message(socket, CONTROL_TYPE_COOKIE,
                              self.manager.issue_cookie(address), address)
        elif msg.type == CONTROL_TYPE_PREPARE:
            # Parse the prepare message
            try:
                prepare = PrepareMessage.parse(msg.data)
            except cs.ConstructError:
                return

            # Check for a cookie match
            if not self.manager.verify_cookie(address, prepare.cookie):
                return

            # First check if this tunnel has already been prepared
            tunnel, created = self.manager.setup_tunnel(
                self.port, address, prepare.uuid, prepare.cookie,
                prepare.tunnel_id or 1)
            if tunnel is None:
                self.send_message(socket, CONTROL_TYPE_ERROR, address=address)
                return

            self.send_message(socket, CONTROL_TYPE_TUNNEL,
                              cs.UBInt32("tunnel_id").build(tunnel.id),
                              address)

            if self.tunnel is None and created:
                # Clear conntrack tables so all new packets are evaluated against the
                # netfilter rules and so redirected into the tunnel
                tunnel.clear_conntrack()

                # Invoke any session up hooks
                tunnel.call_session_up_hooks()
        else:
            # Return the message on any other messages
            return msg
Beispiel #5
0
  def _keep_alive_do(self):
    """
    Periodically transmits keepalives over the tunnel and checks
    if the tunnel has timed out due to inactivity.
    The sequence number is needed because some ISP (usually cable or mobile operators)
    do some "optimisation" and drop udp packets containing the same content.
    """
    while True:
      self.handler.send_message(self.socket, CONTROL_TYPE_KEEPALIVE,
          cs.UBInt32("size").build(self._next_keepalive_sequence_number()))

      # Check if we are still alive or not; if not, kill the tunnel
      timeout_interval = self.manager.config.getint("broker", "tunnel_timeout")
      if datetime.datetime.now() - self.last_alive > datetime.timedelta(seconds = timeout_interval):
        if self.manager.config.getboolean('log', 'log_ip_addresses'):
          logger.warning("Session with tunnel %d to %s:%d timed out." % (self.id, self.endpoint[0],
            self.endpoint[1]))
        else:
          logger.warning("Session with tunnel %d timed out." % self.id)

        gevent.spawn(self.manager.close_tunnel, self, ERROR_REASON_FROM_SERVER & ERROR_REASON_TIMEOUT)
        return

      gevent.sleep(5.0)
Beispiel #6
0
"""

__author__ = 'yukai'

# to reduce code width
_ARRAY = construct.Array
_POINTER = construct.Pointer
_REPEAT = construct.RepeatUntil
_INT = construct.ULInt32('integer')
_INT64 = construct.ULInt64('integer64')

_BOOKMARK_DATA = construct.Struct(
    'bookmark_data',
    construct.String('magic', 4),
    construct.ULInt32('length'),
    construct.UBInt32('version'),
    construct.ULInt32('offset'),  # offset to "FirstToC Offset"
    _POINTER(
        lambda ctx: ctx.offset,
        construct.Struct(
            'ftoc_offset',
            construct.Anchor('abs_offset'),
            construct.ULInt32('offset'),
            _POINTER(
                lambda ctx: ctx.abs_offset + ctx.offset,
                _REPEAT(
                    lambda obj, ctx: obj.offset == 0x00000000,
                    construct.Struct(
                        'toc',
                        construct.ULInt32('length'),
                        construct.SLInt16('type'),
Beispiel #7
0
def patch_binary(**kwargs):
    """
    PATCHFILE is the path to the binary patch file
    """

    of = kwargs["outfile"]
    data = kwargs["patchfile"].read()
    if kwargs["mask"]:
        mask = kwargs["mask"].read()
    else:
        mask = "\xFF" * len(data)

    offset = 0
    if kwargs["blockidx"]:
        i = kwargs["blockidx"]
        offset = firmwareParsed.DXE.block[i].blockHeader.offset

    if kwargs["offset"]:
        try:
            assert offset[0:2] == "0x"
            offset += int(kwargs["offset"][2:], 16)
        except AssertionError:
            offset += int(kwargs["offset"])

    if not kwargs["already_encrypted"]:
        data = SubstitutionCipher(Bytes("", len(data))).build(data)

    blobFile.seek(0)
    shutil.copyfileobj(blobFile, of)
    of.seek(0)

    blobFile.seek(offset)
    blob = blobFile.read(len(data))
    of.seek(offset)

    maskedOrig = [
        ord(blobbyte) & ~ord(maskbyte)
        for blobbyte, maskbyte in zip(blob, mask)
    ]
    maskedPatch = [
        ord(databyte) & ord(maskbyte)
        for databyte, maskbyte in zip(data, mask)
    ]

    for orig, patch in zip(
            maskedOrig,
            maskedPatch):  # TODO:what if patch is longer than the file?
        of.write(chr(orig | patch))

    if kwargs["patch_block_hdr_checksum"]:
        assert kwargs["blockidx"]
        of.seek(firmwareParsed.DXE.block[i].blockHeader.offset)

        bytesl = [ord(x) for x in of.read(4)
                  ]  # TODO: move to seaparate function (see also asserts)
        hdr_checkxor = reduce(operator.xor, bytesl)
        of.seek(firmwareParsed.DXE.block[i].blockHeader.offset + 3)
        of.write(construct.UBInt32("").build(hdr_checkxor))

    if kwargs["patch_blob_hdr_checksum"]:
        of.seek(0)
        checksum = sum(
            ord(b) for b in fwUpdateFile.parse(of.read()).decryptedBody)
        print(checksum)
        of.seek(0)
        of.seek(16)
        of.write(construct.ULInt32("").build(checksum))
Beispiel #8
0
class BinaryCookieParser(interface.FileObjectParser):
    """Parser for Safari Binary Cookie files."""

    NAME = 'binary_cookies'
    DESCRIPTION = 'Parser for Safari Binary Cookie files.'

    _FILE_HEADER = construct.Struct(
        'file_header', construct.Bytes('signature', 4),
        construct.UBInt32('number_of_pages'),
        construct.Array(lambda ctx: ctx.number_of_pages,
                        construct.UBInt32('page_sizes')))

    _COOKIE_RECORD = construct.Struct('cookie_record',
                                      construct.ULInt32('size'),
                                      construct.Bytes('unknown_1', 4),
                                      construct.ULInt32('flags'),
                                      construct.Bytes('unknown_2', 4),
                                      construct.ULInt32('url_offset'),
                                      construct.ULInt32('name_offset'),
                                      construct.ULInt32('path_offset'),
                                      construct.ULInt32('value_offset'),
                                      construct.Bytes('end_of_cookie', 8),
                                      construct.LFloat64('expiration_date'),
                                      construct.LFloat64('creation_date'))

    _PAGE_HEADER = construct.Struct(
        'page_header', construct.Bytes('header', 4),
        construct.ULInt32('number_of_records'),
        construct.Array(lambda ctx: ctx.number_of_records,
                        construct.ULInt32('offsets')))

    def __init__(self):
        """Initializes a parser object."""
        super(BinaryCookieParser, self).__init__()
        self._cookie_plugins = (
            cookie_plugins_manager.CookiePluginsManager.GetPlugins())

    def _ParseCookieRecord(self, parser_mediator, page_data, page_offset):
        """Parses a cookie record

    Args:
      parser_mediator (ParserMediator): parser mediator.
      page_data (bytes): page data.
      page_offset (int): offset of the cookie record relative to the start
          of the page.
    """
        try:
            cookie = self._COOKIE_RECORD.parse(page_data[page_offset:])
        except construct.FieldError:
            message = 'Unable to read cookie record at offset: {0:d}'.format(
                page_offset)
            parser_mediator.ProduceExtractionError(message)
            return

        # The offset is determined by the range between the start of the current
        # offset until the start of the next offset. Thus we need to determine
        # the proper ordering of the offsets, since they are not always in the
        # same ordering.
        offset_dict = {
            cookie.url_offset: 'url',
            cookie.name_offset: 'name',
            cookie.value_offset: 'value',
            cookie.path_offset: 'path'
        }

        offsets = sorted(offset_dict.keys())
        offsets.append(cookie.size + page_offset)

        # TODO: Find a better approach to parsing the data than this.
        data_dict = {}
        for current_offset in range(0, len(offsets) - 1):
            # Get the current offset and the offset for the next entry.
            start, end = offsets[current_offset:current_offset + 2]
            value = offset_dict.get(offsets[current_offset])
            # Read the data.
            data_all = page_data[start + page_offset:end + page_offset]
            data, _, _ = data_all.partition(b'\x00')
            data_dict[value] = data

        event_data = SafariBinaryCookieEventData()
        event_data.cookie_name = data_dict.get('name')
        event_data.cookie_value = data_dict.get('value')
        event_data.flags = cookie.flags
        event_data.path = data_dict.get('path')
        event_data.url = data_dict.get('url')

        if cookie.creation_date:
            date_time = dfdatetime_cocoa_time.CocoaTime(
                timestamp=cookie.creation_date)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_CREATION)
            parser_mediator.ProduceEventWithEventData(event, event_data)

        if cookie.expiration_date:
            date_time = dfdatetime_cocoa_time.CocoaTime(
                timestamp=cookie.expiration_date)
        else:
            date_time = dfdatetime_semantic_time.SemanticTime('Not set')

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
        parser_mediator.ProduceEventWithEventData(event, event_data)

        for plugin in self._cookie_plugins:
            if parser_mediator.abort:
                break

            if event_data.cookie_name != plugin.COOKIE_NAME:
                continue

            try:
                plugin.UpdateChainAndProcess(
                    parser_mediator,
                    cookie_name=event_data.cookie_name,
                    cookie_data=event_data.cookie_value,
                    url=event_data.url)

            except Exception as exception:  # pylint: disable=broad-except
                parser_mediator.ProduceExtractionError(
                    'plugin: {0:s} unable to parse cookie with error: {1!s}'.
                    format(plugin.NAME, exception))

    def _ParsePage(self, parser_mediator, page_number, page_data):
        """Parses a page.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      page_number (int): page number.
      page_data (bytes): page data.
    """
        try:
            page_header = self._PAGE_HEADER.parse(page_data)
        except construct.FieldError:
            # TODO: add offset
            parser_mediator.ProduceExtractionError(
                'unable to read header of page: {0:d} at offset: 0x{1:08x}'.
                format(page_number, 0))
            return

        for page_offset in page_header.offsets:
            if parser_mediator.abort:
                break

            self._ParseCookieRecord(parser_mediator, page_data, page_offset)

        # TODO: check footer.

    @classmethod
    def GetFormatSpecification(cls):
        """Retrieves the format specification for parser selection.

    Returns:
      FormatSpecification: format specification.
    """
        format_specification = specification.FormatSpecification(cls.NAME)
        format_specification.AddNewSignature(b'cook\x00', offset=0)
        return format_specification

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Safari binary cookie file-like object.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      file_object (dfvfs.FileIO): file-like object to be parsed.

    Raises:
      UnableToParseFile: when the file cannot be parsed, this will signal
          the event extractor to apply other parsers.
    """
        try:
            file_header = self._FILE_HEADER.parse_stream(file_object)
        except (IOError, construct.ArrayError,
                construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to read file header with error: {0!s}.'.format(
                    exception))
            raise errors.UnableToParseFile()

        if file_header.signature != b'cook':
            parser_mediator.ProduceExtractionError(
                'unsupported file signature.')
            raise errors.UnableToParseFile()

        for index, page_size in enumerate(file_header.page_sizes):
            if parser_mediator.abort:
                break

            page_data = file_object.read(page_size)
            if len(page_data) != page_size:
                parser_mediator.ProduceExtractionError(
                    'unable to read page: {0:d}'.format(index))
                break

            self._ParsePage(parser_mediator, index, page_data)
Beispiel #9
0
class BinaryCookiesFile(object):
    """Class that contains a Cookies.binarycookies file."""

    _FILE_HEADER = construct.Struct(u'file_header',
                                    construct.Bytes(u'signature', 4),
                                    construct.UBInt32(u'number_of_pages'))

    _PAGE_HEADER = construct.Struct(
        u'page_header', construct.ULInt32(u'signature'),
        construct.ULInt32(u'number_of_records'),
        construct.Array(lambda ctx: ctx.number_of_records,
                        construct.ULInt32(u'offsets')))

    _RECORD_HEADER = construct.Struct(u'record_header',
                                      construct.ULInt32(u'size'),
                                      construct.ULInt32(u'unknown1'),
                                      construct.ULInt32(u'flags'),
                                      construct.ULInt32(u'unknown2'),
                                      construct.ULInt32(u'url_offset'),
                                      construct.ULInt32(u'name_offset'),
                                      construct.ULInt32(u'path_offset'),
                                      construct.ULInt32(u'value_offset'),
                                      construct.ULInt64(u'unknown3'),
                                      construct.LFloat64(u'expiration_time'),
                                      construct.LFloat64(u'creation_time'))

    _FILE_FOOTER = construct.Struct(u'file_footer',
                                    construct.Bytes(u'unknown1', 8))

    def __init__(self, debug=False):
        """Initializes a file.

    Args:
      debug (Optional[bool]): True if debug information should be printed.
    """
        super(BinaryCookiesFile, self).__init__()
        self._debug = debug
        self._file_object = None
        self._file_object_opened_in_object = False
        self._file_size = 0
        self._page_sizes = []

    def _ReadFileFooter(self):
        """Reads the file footer.

    Raises:
      IOError: if the file footer cannot be read.
    """
        file_footer_data = self._file_object.read(self._FILE_FOOTER.sizeof())

        if self._debug:
            print(u'File footer data:')
            print(hexdump.Hexdump(file_footer_data))

    def _ReadFileHeader(self):
        """Reads the file header.

    Raises:
      IOError: if the file header cannot be read.
    """
        if self._debug:
            print(u'Seeking file header offset: 0x{0:08x}'.format(0))

        self._file_object.seek(0, os.SEEK_SET)

        file_header_data = self._file_object.read(self._FILE_HEADER.sizeof())

        if self._debug:
            print(u'File header data:')
            print(hexdump.Hexdump(file_header_data))

        try:
            file_header_struct = self._FILE_HEADER.parse(file_header_data)
        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse file header with error: {0:s}'.format(
                    exception))

        if self._debug:
            print(u'Signature\t\t\t\t\t\t\t: {0!s}'.format(
                file_header_struct.signature))
            print(u'Number of pages\t\t\t\t\t\t\t: {0:d}'.format(
                file_header_struct.number_of_pages))

            print(u'')

        page_sizes_data_size = file_header_struct.number_of_pages * 4

        page_sizes_data = self._file_object.read(page_sizes_data_size)

        if self._debug:
            print(u'Page sizes data:')
            print(hexdump.Hexdump(page_sizes_data))

        try:
            page_sizes_array = construct.Array(
                file_header_struct.number_of_pages,
                construct.UBInt32(u'page_sizes')).parse(page_sizes_data)

        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse page sizes array with error: {0:s}'.format(
                    exception))

        self._page_sizes = []
        for page_index in range(file_header_struct.number_of_pages):
            self._page_sizes.append(page_sizes_array[page_index])

            if self._debug:
                print(u'Page: {0:d} size\t\t\t\t\t\t\t: {1:d}'.format(
                    page_index, page_sizes_array[page_index]))

        if self._debug:
            print(u'')

    def _ReadPages(self):
        """Reads the pages."""
        for page_size in iter(self._page_sizes):
            self._ReadPage(page_size)

    def _ReadPage(self, page_size):
        """Reads the page.

    Args:
      page_size (int): page size.
    """
        page_data = self._file_object.read(page_size)

        try:
            page_header_struct = self._PAGE_HEADER.parse(page_data)
        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse file header with error: {0:s}'.format(
                    exception))

        page_header_data_size = 8 + (4 * page_header_struct.number_of_records)

        if self._debug:
            print(u'Page header data:')
            print(hexdump.Hexdump(page_data[:page_header_data_size]))

        if self._debug:
            print(u'Signature\t\t\t\t\t\t\t: 0x{0:08x}'.format(
                page_header_struct.signature))
            print(u'Number of records\t\t\t\t\t\t: {0:d}'.format(
                page_header_struct.number_of_records))

        record_offsets = []
        for record_index in range(page_header_struct.number_of_records):
            record_offsets.append(page_header_struct.offsets[record_index])

            if self._debug:
                print(u'Record: {0:d} offset\t\t\t\t\t\t: {1:d}'.format(
                    record_index, page_header_struct.offsets[record_index]))

        if self._debug:
            print(u'')

        for record_offset in iter(record_offsets):
            self._ParseRecord(page_data, record_offset)

    def _ParseRecord(self, page_data, record_offset):
        """Reads a record from the page data.

    Args:
      page_data (bytes): page data.
      record_offset (int): record offset.
    """
        try:
            record_header_struct = self._RECORD_HEADER.parse(
                page_data[record_offset:])
        except construct.FieldError as exception:
            raise IOError(
                u'Unable to parse record header with error: {0:s}'.format(
                    exception))

        record_data_size = record_offset + record_header_struct.size

        if self._debug:
            print(u'Record data:')
            print(hexdump.Hexdump(page_data[record_offset:record_data_size]))

        if self._debug:
            print(u'Size\t\t\t\t\t\t\t\t: {0:d}'.format(
                record_header_struct.size))
            print(u'Unknown1\t\t\t\t\t\t\t: 0x{0:08x}'.format(
                record_header_struct.unknown1))
            print(u'Flags\t\t\t\t\t\t\t\t: 0x{0:08x}'.format(
                record_header_struct.flags))
            print(u'Unknown2\t\t\t\t\t\t\t: 0x{0:08x}'.format(
                record_header_struct.unknown2))
            print(u'URL offset\t\t\t\t\t\t\t: {0:d}'.format(
                record_header_struct.url_offset))
            print(u'name offset\t\t\t\t\t\t\t: {0:d}'.format(
                record_header_struct.name_offset))
            print(u'path offset\t\t\t\t\t\t\t: {0:d}'.format(
                record_header_struct.path_offset))
            print(u'value offset\t\t\t\t\t\t\t: {0:d}'.format(
                record_header_struct.value_offset))
            print(u'Unknown3\t\t\t\t\t\t\t: 0x{0:08x}'.format(
                record_header_struct.unknown3))

            date_time = (datetime.datetime(2001, 1, 1) + datetime.timedelta(
                seconds=int(record_header_struct.expiration_time)))
            print(u'expiration time\t\t\t\t\t\t\t: {0!s} ({1:f})'.format(
                date_time, record_header_struct.expiration_time))

            date_time = (datetime.datetime(2001, 1, 1) + datetime.timedelta(
                seconds=int(record_header_struct.creation_time)))
            print(u'creation time\t\t\t\t\t\t\t: {0!s} ({1:f})'.format(
                date_time, record_header_struct.creation_time))

            print(u'')

            if record_header_struct.url_offset:
                data_offset = record_offset + record_header_struct.url_offset
                string = construct.CString(u'string').parse(
                    page_data[data_offset:record_data_size])
            else:
                sting = u''

            print(u'URL\t\t\t\t\t\t\t\t: {0:s}'.format(string))

            if record_header_struct.name_offset:
                data_offset = record_offset + record_header_struct.name_offset
                string = construct.CString(u'string').parse(
                    page_data[data_offset:record_data_size])
            else:
                sting = u''

            print(u'Name\t\t\t\t\t\t\t\t: {0:s}'.format(string))

            if record_header_struct.path_offset:
                data_offset = record_offset + record_header_struct.path_offset
                string = construct.CString(u'string').parse(
                    page_data[data_offset:record_data_size])
            else:
                sting = u''

            print(u'Path\t\t\t\t\t\t\t\t: {0:s}'.format(string))

            if record_header_struct.value_offset:
                data_offset = record_offset + record_header_struct.value_offset
                string = construct.CString(u'string').parse(
                    page_data[data_offset:record_data_size])
            else:
                sting = u''

            print(u'Value\t\t\t\t\t\t\t\t: {0:s}'.format(string))

            print(u'')

    def Close(self):
        """Closes a file."""
        if self._file_object_opened_in_object:
            self._file_object.close()
        self._file_object = None

    def Open(self, filename):
        """Opens a file.

    Args:
      filename (str): filename.
    """
        stat_object = os.stat(filename)
        self._file_size = stat_object.st_size

        self._file_object = open(filename, 'rb')
        self._file_object_opened_in_object = True

        self._ReadFileHeader()
        self._ReadPages()
        self._ReadFileFooter()
Beispiel #10
0
class KeychainParser(interface.FileObjectParser):
  """Parser for Keychain files."""

  NAME = 'mac_keychain'
  DESCRIPTION = 'Parser for MacOS Keychain files.'

  KEYCHAIN_SIGNATURE = b'kych'
  KEYCHAIN_MAJOR_VERSION = 1
  KEYCHAIN_MINOR_VERSION = 0

  RECORD_TYPE_APPLICATION = 0x80000000
  RECORD_TYPE_INTERNET = 0x80000001

  # DB HEADER.
  KEYCHAIN_DB_HEADER = construct.Struct(
      'db_header',
      construct.Bytes('signature', 4),
      construct.UBInt16('major_version'),
      construct.UBInt16('minor_version'),
      construct.UBInt32('header_size'),
      construct.UBInt32('schema_offset'),
      construct.Padding(4))

  # DB SCHEMA.
  KEYCHAIN_DB_SCHEMA = construct.Struct(
      'db_schema',
      construct.UBInt32('size'),
      construct.UBInt32('number_of_tables'))

  # For each number_of_tables, the schema has a TABLE_OFFSET with the
  # offset starting in the DB_SCHEMA.
  TABLE_OFFSET = construct.UBInt32('table_offset')

  TABLE_HEADER = construct.Struct(
      'table_header',
      construct.UBInt32('table_size'),
      construct.UBInt32('record_type'),
      construct.UBInt32('number_of_records'),
      construct.UBInt32('first_record'),
      construct.UBInt32('index_offset'),
      construct.Padding(4),
      construct.UBInt32('recordnumbercount'))

  RECORD_HEADER = construct.Struct(
      'record_header',
      construct.UBInt32('entry_length'),
      construct.Padding(12),
      construct.UBInt32('ssgp_length'),
      construct.Padding(4),
      construct.UBInt32('creation_time'),
      construct.UBInt32('last_modification_time'),
      construct.UBInt32('text_description'),
      construct.Padding(4),
      construct.UBInt32('comments'),
      construct.Padding(8),
      construct.UBInt32('entry_name'),
      construct.Padding(20),
      construct.UBInt32('account_name'),
      construct.Padding(4))

  RECORD_HEADER_APP = construct.Struct(
      'record_entry_app',
      RECORD_HEADER,
      construct.Padding(4))

  RECORD_HEADER_INET = construct.Struct(
      'record_entry_inet',
      RECORD_HEADER,
      construct.UBInt32('where'),
      construct.UBInt32('protocol'),
      construct.UBInt32('type'),
      construct.Padding(4),
      construct.UBInt32('url'))

  TEXT = construct.PascalString(
      'text', length_field=construct.UBInt32('length'))

  TIME = construct.Struct(
      'timestamp',
      construct.String('year', 4),
      construct.String('month', 2),
      construct.String('day', 2),
      construct.String('hour', 2),
      construct.String('minute', 2),
      construct.String('second', 2),
      construct.Padding(2))

  TYPE_TEXT = construct.String('type', 4)

  # TODO: add more protocols.
  _PROTOCOL_TRANSLATION_DICT = {
      'htps': 'https',
      'smtp': 'smtp',
      'imap': 'imap',
      'http': 'http'}

  def _ReadEntryApplication(self, parser_mediator, file_object):
    """Extracts the information from an application password entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
    """
    record_offset = file_object.tell()
    try:
      record_struct = self.RECORD_HEADER_APP.parse_stream(file_object)
    except (IOError, construct.FieldError):
      parser_mediator.ProduceExtractionError(
          'unable to parse record structure at offset: 0x{0:08x}'.format(
              record_offset))
      return

    (ssgp_hash, creation_time, last_modification_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_mediator, file_object, record_struct.record_header,
         record_offset)

    # Move to the end of the record.
    next_record_offset = (
        record_offset + record_struct.record_header.entry_length)
    file_object.seek(next_record_offset, os.SEEK_SET)

    event_data = KeychainApplicationRecordEventData()
    event_data.account_name = account_name
    event_data.comments = comments
    event_data.entry_name = entry_name
    event_data.ssgp_hash = ssgp_hash
    event_data.text_description = text_description

    if creation_time:
      event = time_events.DateTimeValuesEvent(
          creation_time, definitions.TIME_DESCRIPTION_CREATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

    if last_modification_time:
      event = time_events.DateTimeValuesEvent(
          last_modification_time, definitions.TIME_DESCRIPTION_MODIFICATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

  def _ReadEntryHeader(
      self, parser_mediator, file_object, record, record_offset):
    """Read the common record attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_entry (dfvfs.FileEntry): a file entry object.
      file_object (dfvfs.FileIO): a file-like object.
      record (construct.Struct): record header structure.
      record_offset (int): offset of the start of the record.

    Returns:
      A tuple containing:
        ssgp_hash: Hash of the encrypted data (passwd, cert, note).
        creation_time (dfdatetime.TimeElements): entry creation time or None.
        last_modification_time ((dfdatetime.TimeElements): entry last
            modification time or None.
        text_description: A brief description of the entry.
        entry_name: Name of the entry
        account_name: Name of the account.
    """
    # TODO: reduce number of seeks and/or offset calculations needed
    # for parsing.

    # Info: The hash header always start with the string ssgp follow by
    #       the hash. Furthermore The fields are always a multiple of four.
    #       Then if it is not multiple the value is padded by 0x00.
    ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:])

    creation_time = None

    structure_offset = record_offset + record.creation_time - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      time_structure = self.TIME.parse_stream(file_object)
    except construct.FieldError as exception:
      time_structure = None
      parser_mediator.ProduceExtractionError(
          'unable to parse creation time with error: {0!s}'.format(exception))

    if time_structure:
      time_elements_tuple = (
          time_structure.year, time_structure.month, time_structure.day,
          time_structure.hour, time_structure.minute, time_structure.second)

      creation_time = dfdatetime_time_elements.TimeElements()
      try:
        creation_time.CopyFromStringTuple(
            time_elements_tuple=time_elements_tuple)
      except ValueError:
        creation_time = None
        parser_mediator.ProduceExtractionError(
            'invalid creation time value: {0!s}'.format(time_elements_tuple))

    last_modification_time = None

    structure_offset = record_offset + record.last_modification_time - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      time_structure = self.TIME.parse_stream(file_object)
    except construct.FieldError as exception:
      time_structure = None
      parser_mediator.ProduceExtractionError(
          'unable to parse last modification time with error: {0!s}'.format(
              exception))

    if time_structure:
      time_elements_tuple = (
          time_structure.year, time_structure.month, time_structure.day,
          time_structure.hour, time_structure.minute, time_structure.second)

      last_modification_time = dfdatetime_time_elements.TimeElements()
      try:
        last_modification_time.CopyFromStringTuple(
            time_elements_tuple=time_elements_tuple)
      except ValueError:
        last_modification_time = None
        parser_mediator.ProduceExtractionError(
            'invalid last modification time value: {0!s}'.format(
                time_elements_tuple))

    text_description = 'N/A'
    if record.text_description:
      structure_offset = record_offset + record.text_description - 1
      file_object.seek(structure_offset, os.SEEK_SET)

      try:
        text_description = self.TEXT.parse_stream(file_object)
      except construct.FieldError as exception:
        parser_mediator.ProduceExtractionError(
            'unable to parse text description with error: {0!s}'.format(
                exception))

    comments = 'N/A'
    if record.comments:
      structure_offset = record_offset + record.comments - 1
      file_object.seek(structure_offset, os.SEEK_SET)

      try:
        comments = self.TEXT.parse_stream(file_object)
      except construct.FieldError as exception:
        parser_mediator.ProduceExtractionError(
            'unable to parse comments with error: {0!s}'.format(exception))

    structure_offset = record_offset + record.entry_name - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      entry_name = self.TEXT.parse_stream(file_object)
    except construct.FieldError as exception:
      entry_name = 'N/A'
      parser_mediator.ProduceExtractionError(
          'unable to parse entry name with error: {0!s}'.format(exception))

    structure_offset = record_offset + record.account_name - 1
    file_object.seek(structure_offset, os.SEEK_SET)

    try:
      account_name = self.TEXT.parse_stream(file_object)
    except construct.FieldError as exception:
      account_name = 'N/A'
      parser_mediator.ProduceExtractionError(
          'unable to parse account name with error: {0!s}'.format(exception))

    return (
        ssgp_hash, creation_time, last_modification_time,
        text_description, comments, entry_name, account_name)

  def _ReadEntryInternet(self, parser_mediator, file_object):
    """Extracts the information from an Internet password entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
    """
    record_offset = file_object.tell()
    try:
      record_header_struct = self.RECORD_HEADER_INET.parse_stream(file_object)
    except (IOError, construct.FieldError):
      parser_mediator.ProduceExtractionError((
          'unable to parse record header structure at offset: '
          '0x{0:08x}').format(record_offset))
      return

    (ssgp_hash, creation_time, last_modification_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_mediator, file_object, record_header_struct.record_header,
         record_offset)

    if not record_header_struct.where:
      where = 'N/A'
      protocol = 'N/A'
      type_protocol = 'N/A'

    else:
      offset = record_offset + record_header_struct.where - 1
      file_object.seek(offset, os.SEEK_SET)
      where = self.TEXT.parse_stream(file_object)

      offset = record_offset + record_header_struct.protocol - 1
      file_object.seek(offset, os.SEEK_SET)
      protocol = self.TYPE_TEXT.parse_stream(file_object)

      offset = record_offset + record_header_struct.type - 1
      file_object.seek(offset, os.SEEK_SET)
      type_protocol = self.TEXT.parse_stream(file_object)
      type_protocol = self._PROTOCOL_TRANSLATION_DICT.get(
          type_protocol, type_protocol)

      if record_header_struct.url:
        offset = record_offset + record_header_struct.url - 1
        file_object.seek(offset, os.SEEK_SET)
        url = self.TEXT.parse_stream(file_object)
        where = '{0:s}{1:s}'.format(where, url)

    # Move to the end of the record.
    next_record_offset = (
        record_offset + record_header_struct.record_header.entry_length)
    file_object.seek(next_record_offset, os.SEEK_SET)

    event_data = KeychainInternetRecordEventData()
    event_data.account_name = account_name
    event_data.comments = comments
    event_data.entry_name = entry_name
    event_data.protocol = protocol
    event_data.ssgp_hash = ssgp_hash
    event_data.text_description = text_description
    event_data.type_protocol = type_protocol
    event_data.where = where

    if creation_time:
      event = time_events.DateTimeValuesEvent(
          creation_time, definitions.TIME_DESCRIPTION_CREATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

    if last_modification_time:
      event = time_events.DateTimeValuesEvent(
          last_modification_time, definitions.TIME_DESCRIPTION_MODIFICATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

  def _ReadTableOffsets(self, parser_mediator, file_object):
    """Reads the table offsets.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      list[int]: table offsets.
    """
    # INFO: The HEADER KEYCHAIN:
    # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z]
    # Where the table offset is relative to the first byte of the DB Schema,
    # then we must add to this offset the size of the [DBHEADER].
    # Read the database schema and extract the offset for all the tables.
    # They are ordered by file position from the top to the bottom of the file.
    table_offsets = []

    try:
      db_schema_struct = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object)
    except (IOError, construct.FieldError):
      parser_mediator.ProduceExtractionError(
          'unable to parse database schema structure')
      return []

    for index in range(db_schema_struct.number_of_tables):
      try:
        table_offset = self.TABLE_OFFSET.parse_stream(file_object)
      except (IOError, construct.FieldError):
        parser_mediator.ProduceExtractionError(
            'unable to parse table offsets: {0:d}'.format(index))
        return

      table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof())

    return table_offsets

  @classmethod
  def GetFormatSpecification(cls):
    """Retrieves the format specification.

    Returns:
      FormatSpecification: format specification.
    """
    format_specification = specification.FormatSpecification(cls.NAME)
    format_specification.AddNewSignature(
        cls.KEYCHAIN_SIGNATURE, offset=0)
    return format_specification

  def ParseFileObject(self, parser_mediator, file_object, **kwargs):
    """Parses a MacOS keychain file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    try:
      db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object)
    except (IOError, construct.FieldError):
      raise errors.UnableToParseFile('Unable to parse file header.')

    if db_header.signature != self.KEYCHAIN_SIGNATURE:
      raise errors.UnableToParseFile('Not a MacOS keychain file.')

    if (db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or
        db_header.minor_version != self.KEYCHAIN_MINOR_VERSION):
      parser_mediator.ProduceExtractionError(
          'unsupported format version: {0:s}.{1:s}'.format(
              db_header.major_version, db_header.minor_version))
      return

    # TODO: document format and determine if -1 offset correction is needed.
    table_offsets = self._ReadTableOffsets(parser_mediator, file_object)
    for table_offset in table_offsets:
      # Skipping X bytes, unknown data at this point.
      file_object.seek(table_offset, os.SEEK_SET)

      try:
        table = self.TABLE_HEADER.parse_stream(file_object)
      except (IOError, construct.FieldError):
        parser_mediator.ProduceExtractionError(
            'unable to parse table structure at offset: 0x{0:08x}'.format(
                table_offset))
        continue

      # Table_offset: absolute byte in the file where the table starts.
      # table.first_record: first record in the table, relative to the
      #                     first byte of the table.
      file_object.seek(table_offset + table.first_record, os.SEEK_SET)

      if table.record_type == self.RECORD_TYPE_INTERNET:
        for _ in range(table.number_of_records):
          self._ReadEntryInternet(parser_mediator, file_object)

      elif table.record_type == self.RECORD_TYPE_APPLICATION:
        for _ in range(table.number_of_records):
          self._ReadEntryApplication(parser_mediator, file_object)
Beispiel #11
0
class BSMParser(interface.FileObjectParser):
  """Parser for BSM files."""

  NAME = 'bsm_log'
  DESCRIPTION = 'Parser for BSM log files.'

  # BSM supported version (0x0b = 11).
  AUDIT_HEADER_VERSION = 11

  # Magic Trail Header.
  BSM_TOKEN_TRAILER_MAGIC = b'b105'

  # IP Version constants.
  AU_IPv4 = 4
  AU_IPv6 = 16

  IPV4_STRUCT = construct.UBInt32('ipv4')

  IPV6_STRUCT = construct.Struct(
      'ipv6',
      construct.UBInt64('high'),
      construct.UBInt64('low'))

  # Tested structures.
  # INFO: I have ommited the ID in the structures declaration.
  #       I used the BSM_TYPE first to read the ID, and then, the structure.
  # Tokens always start with an ID value that identifies their token
  # type and subsequent structure.
  _BSM_TOKEN = construct.UBInt8('token_id')

  # Data type structures.
  BSM_TOKEN_DATA_CHAR = construct.String('value', 1)
  BSM_TOKEN_DATA_SHORT = construct.UBInt16('value')
  BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value')

  # Common structure used by other structures.
  # audit_uid: integer, uid that generates the entry.
  # effective_uid: integer, the permission user used.
  # effective_gid: integer, the permission group used.
  # real_uid: integer, user id of the user that execute the process.
  # real_gid: integer, group id of the group that execute the process.
  # pid: integer, identification number of the process.
  # session_id: unknown, need research.
  BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
      'subject_data',
      construct.UBInt32('audit_uid'),
      construct.UBInt32('effective_uid'),
      construct.UBInt32('effective_gid'),
      construct.UBInt32('real_uid'),
      construct.UBInt32('real_gid'),
      construct.UBInt32('pid'),
      construct.UBInt32('session_id'))

  # Common structure used by other structures.
  # Identify the kind of inet (IPv4 or IPv6)
  # TODO: instead of 16, AU_IPv6 must be used.
  BSM_IP_TYPE_SHORT = construct.Struct(
      'bsm_ip_type_short',
      construct.UBInt32('net_type'),
      construct.Switch(
          'ip_addr',
          _BSMTokenGetNetType,
          {16: IPV6_STRUCT},
          default=IPV4_STRUCT))

  # Initial fields structure used by header structures.
  # length: integer, the length of the entry, equal to trailer (doc: length).
  # version: integer, version of BSM (AUDIT_HEADER_VERSION).
  # event_type: integer, the type of event (/etc/security/audit_event).
  # modifier: integer, unknown, need research (It is always 0).
  BSM_HEADER = construct.Struct(
      'bsm_header',
      construct.UBInt32('length'),
      construct.UBInt8('version'),
      construct.UBInt16('event_type'),
      construct.UBInt16('modifier'))

  # First token of one entry.
  # timestamp: unsigned integer, number of seconds since
  #            January 1, 1970 00:00:00 UTC.
  # microseconds: unsigned integer, number of micro seconds.
  BSM_HEADER32 = construct.Struct(
      'bsm_header32',
      BSM_HEADER,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  BSM_HEADER64 = construct.Struct(
      'bsm_header64',
      BSM_HEADER,
      construct.UBInt64('timestamp'),
      construct.UBInt64('microseconds'))

  BSM_HEADER32_EX = construct.Struct(
      'bsm_header32_ex',
      BSM_HEADER,
      BSM_IP_TYPE_SHORT,
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'))

  # Token TEXT, provides extra information.
  BSM_TOKEN_TEXT = construct.Struct(
      'bsm_token_text',
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Path of the executable.
  BSM_TOKEN_PATH = BSM_TOKEN_TEXT

  # Identified the end of the record (follow by TRAILER).
  # status: integer that identifies the status of the exit (BSM_ERRORS).
  # return: returned value from the operation.
  BSM_TOKEN_RETURN32 = construct.Struct(
      'bsm_token_return32',
      construct.UBInt8('status'),
      construct.UBInt32('return_value'))

  BSM_TOKEN_RETURN64 = construct.Struct(
      'bsm_token_return64',
      construct.UBInt8('status'),
      construct.UBInt64('return_value'))

  # Identified the number of bytes that was written.
  # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
  # length: integer that has the number of bytes from the entry size.
  BSM_TOKEN_TRAILER = construct.Struct(
      'bsm_token_trailer',
      construct.UBInt16('magic'),
      construct.UBInt32('record_length'))

  # A 32-bits argument.
  # num_arg: the number of the argument.
  # name_arg: the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT32 = construct.Struct(
      'bsm_token_argument32',
      construct.UBInt8('num_arg'),
      construct.UBInt32('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # A 64-bits argument.
  # num_arg: integer, the number of the argument.
  # name_arg: text, the argument's name.
  # text: the string value of the argument.
  BSM_TOKEN_ARGUMENT64 = construct.Struct(
      'bsm_token_argument64',
      construct.UBInt8('num_arg'),
      construct.UBInt64('name_arg'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # Identify an user.
  # terminal_id: unknown, research needed.
  # terminal_addr: unknown, research needed.
  BSM_TOKEN_SUBJECT32 = construct.Struct(
      'bsm_token_subject32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # Identify an user using a extended Token.
  # terminal_port: unknown, need research.
  # net_type: unknown, need research.
  BSM_TOKEN_SUBJECT32_EX = construct.Struct(
      'bsm_token_subject32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_opaque // AUT_OPAQUE
  BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

  # au_to_seq // AUT_SEQ
  BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

  # Program execution with options.
  # For each argument we are going to have a string+ "\x00".
  # Example: [00 00 00 02][41 42 43 00 42 42 00]
  #          2 Arguments, Arg1: [414243] Arg2: [4242].
  BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments')

  BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
      'bsm_token_exec_argument',
      construct.RepeatUntil(
          _BSMTokenIsEndOfString, construct.StaticField("text", 1)))

  # au_to_in_addr // AUT_IN_ADDR:
  BSM_TOKEN_ADDR = IPV4_STRUCT

  # au_to_in_addr_ext // AUT_IN_ADDR_EX:
  BSM_TOKEN_ADDR_EXT = construct.Struct(
      'bsm_token_addr_ext',
      construct.UBInt32('net_type'),
      IPV6_STRUCT)

  # au_to_ip // AUT_IP:
  # TODO: parse this header in the correct way.
  BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20)

  # au_to_ipc // AUT_IPC:
  BSM_TOKEN_IPC = construct.Struct(
      'bsm_token_ipc',
      construct.UBInt8('object_type'),
      construct.UBInt32('object_id'))

  # au_to_ipc_perm // au_to_ipc_perm
  BSM_TOKEN_IPC_PERM = construct.Struct(
      'bsm_token_ipc_perm',
      construct.UBInt32('user_id'),
      construct.UBInt32('group_id'),
      construct.UBInt32('creator_user_id'),
      construct.UBInt32('creator_group_id'),
      construct.UBInt32('access_mode'),
      construct.UBInt32('slot_seq'),
      construct.UBInt32('key'))

  # au_to_iport // AUT_IPORT:
  BSM_TOKEN_PORT = construct.UBInt16('port_number')

  # au_to_file // AUT_OTHER_FILE32:
  BSM_TOKEN_FILE = construct.Struct(
      'bsm_token_file',
      construct.UBInt32('timestamp'),
      construct.UBInt32('microseconds'),
      construct.UBInt16('length'),
      construct.Array(_BSMTokenGetLength, construct.UBInt8('text')))

  # au_to_subject64 // AUT_SUBJECT64:
  BSM_TOKEN_SUBJECT64 = construct.Struct(
      'bsm_token_subject64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_subject64_ex // AU_IPv4:
  BSM_TOKEN_SUBJECT64_EX = construct.Struct(
      'bsm_token_subject64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      construct.UBInt32('terminal_type'),
      BSM_IP_TYPE_SHORT)

  # au_to_process32 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS32 = construct.Struct(
      'bsm_token_process32',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      IPV4_STRUCT)

  # au_to_process64 // AUT_PROCESS32:
  BSM_TOKEN_PROCESS64 = construct.Struct(
      'bsm_token_process64',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      IPV4_STRUCT)

  # au_to_process32_ex // AUT_PROCESS32_EX:
  BSM_TOKEN_PROCESS32_EX = construct.Struct(
      'bsm_token_process32_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt32('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_process64_ex // AUT_PROCESS64_EX:
  BSM_TOKEN_PROCESS64_EX = construct.Struct(
      'bsm_token_process64_ex',
      BSM_TOKEN_SUBJECT_SHORT,
      construct.UBInt64('terminal_port'),
      BSM_IP_TYPE_SHORT)

  # au_to_sock_inet32 // AUT_SOCKINET32:
  BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
      'bsm_token_aut_sockinet32',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV4_STRUCT)

  # Info: checked against the source code of XNU, but not against
  #       real BSM file.
  BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
      'bsm_token_aut_sockinet128',
      construct.UBInt16('net_type'),
      construct.UBInt16('port_number'),
      IPV6_STRUCT)

  INET6_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt64('saddr_high'),
      construct.UBInt64('saddr_low'),
      construct.UBInt16('destination_port'),
      construct.UBInt64('daddr_high'),
      construct.UBInt64('daddr_low'))

  INET4_ADDR_TYPE = construct.Struct(
      'addr_type',
      construct.UBInt16('ip_type'),
      construct.UBInt16('source_port'),
      construct.UBInt32('source_address'),
      construct.UBInt16('destination_port'),
      construct.UBInt32('destination_address'))

  # au_to_socket_ex // AUT_SOCKET_EX
  # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
  BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
      'bsm_token_aut_sockinet32_ex',
      construct.UBInt16('socket_domain'),
      construct.UBInt16('socket_type'),
      construct.Switch(
          'structure_addr_port',
          _BSMTokenGetSocketDomain,
          {26: INET6_ADDR_TYPE},
          default=INET4_ADDR_TYPE))

  # au_to_sock_unix // AUT_SOCKUNIX
  BSM_TOKEN_SOCKET_UNIX = construct.Struct(
      'bsm_token_au_to_sock_unix',
      construct.UBInt16('family'),
      construct.RepeatUntil(
          _BSMTokenIsEndOfString,
          construct.StaticField("path", 1)))

  # au_to_data // au_to_data
  # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
  # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
  # unit_count: number of type values.
  # BSM_TOKEN_DATA has a end field = type * unit_count
  BSM_TOKEN_DATA = construct.Struct(
      'bsm_token_data',
      construct.UBInt8('how_to_print'),
      construct.UBInt8('data_type'),
      construct.UBInt8('unit_count'))

  # au_to_attr32 // AUT_ATTR32
  BSM_TOKEN_ATTR32 = construct.Struct(
      'bsm_token_attr32',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt32('device'))

  # au_to_attr64 // AUT_ATTR64
  BSM_TOKEN_ATTR64 = construct.Struct(
      'bsm_token_attr64',
      construct.UBInt32('file_mode'),
      construct.UBInt32('uid'),
      construct.UBInt32('gid'),
      construct.UBInt32('file_system_id'),
      construct.UBInt64('file_system_node_id'),
      construct.UBInt64('device'))

  # au_to_exit // AUT_EXIT
  BSM_TOKEN_EXIT = construct.Struct(
      'bsm_token_exit',
      construct.UBInt32('status'),
      construct.UBInt32('return_value'))

  # au_to_newgroups // AUT_NEWGROUPS
  # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
  BSM_TOKEN_GROUPS = construct.UBInt16('group_number')

  # au_to_exec_env == au_to_exec_args
  BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

  # au_to_zonename //AUT_ZONENAME
  BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

  # Token ID.
  # List of valid Token_ID.
  # Token_ID -> (NAME_STRUCTURE, STRUCTURE)
  # Only the checked structures are been added to the valid structures lists.
  _BSM_TOKEN_TYPES = {
      17: ('BSM_TOKEN_FILE', BSM_TOKEN_FILE),
      19: ('BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER),
      20: ('BSM_HEADER32', BSM_HEADER32),
      21: ('BSM_HEADER64', BSM_HEADER64),
      33: ('BSM_TOKEN_DATA', BSM_TOKEN_DATA),
      34: ('BSM_TOKEN_IPC', BSM_TOKEN_IPC),
      35: ('BSM_TOKEN_PATH', BSM_TOKEN_PATH),
      36: ('BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32),
      38: ('BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32),
      39: ('BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32),
      40: ('BSM_TOKEN_TEXT', BSM_TOKEN_TEXT),
      41: ('BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE),
      42: ('BSM_TOKEN_ADDR', BSM_TOKEN_ADDR),
      43: ('BSM_TOKEN_IP', BSM_TOKEN_IP),
      44: ('BSM_TOKEN_PORT', BSM_TOKEN_PORT),
      45: ('BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32),
      47: ('BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE),
      96: ('BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME),
      113: ('BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64),
      114: ('BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64),
      116: ('BSM_HEADER32_EX', BSM_HEADER32_EX),
      119: ('BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64),
      122: ('BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX),
      127: ('BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX),
      128: ('BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32)}

  # Untested structures.
  # When not tested structure is found, we try to parse using also
  # these structures.
  BSM_TYPE_LIST_NOT_TESTED = {
      49: ('BSM_TOKEN_ATTR', BSM_TOKEN_ATTR32),
      50: ('BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM),
      52: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      59: ('BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS),
      60: ('BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS),
      61: ('BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV),
      62: ('BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32),
      82: ('BSM_TOKEN_EXIT', BSM_TOKEN_EXIT),
      115: ('BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64),
      117: ('BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64),
      123: ('BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX),
      124: ('BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX),
      125: ('BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX),
      126: ('BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT),
      129: ('BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128),
      130: ('BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX)}

  MESSAGE_CAN_NOT_SAVE = (
      'Plaso: some tokens from this entry can not be saved. Entry at 0x{0:X} '
      'with unknown token id "0x{1:X}".')

  # BSM token types:
  # https://github.com/openbsm/openbsm/blob/master/sys/bsm/audit_record.h
  _BSM_TOKEN_TYPE_ARGUMENT32 = 45
  _BSM_TOKEN_TYPE_ARGUMENT64 = 113
  _BSM_TOKEN_TYPE_ATTR = 49
  _BSM_TOKEN_TYPE_ATTR32 = 62
  _BSM_TOKEN_TYPE_ATTR64 = 115
  _BSM_TOKEN_TYPE_EXEC_ARGUMENTS = 60
  _BSM_TOKEN_TYPE_EXEC_ENV = 61
  _BSM_TOKEN_TYPE_EXIT = 82
  _BSM_TOKEN_TYPE_HEADER32 = 20
  _BSM_TOKEN_TYPE_HEADER32_EX = 116
  _BSM_TOKEN_TYPE_HEADER64 = 21
  _BSM_TOKEN_TYPE_PATH = 35
  _BSM_TOKEN_TYPE_PROCESS32 = 38
  _BSM_TOKEN_TYPE_PROCESS32_EX = 123
  _BSM_TOKEN_TYPE_PROCESS64 = 119
  _BSM_TOKEN_TYPE_PROCESS64_EX = 124
  _BSM_TOKEN_TYPE_RETURN32 = 39
  _BSM_TOKEN_TYPE_RETURN64 = 114
  _BSM_TOKEN_TYPE_SUBJECT32 = 36
  _BSM_TOKEN_TYPE_SUBJECT32_EX = 122
  _BSM_TOKEN_TYPE_SUBJECT64 = 117
  _BSM_TOKEN_TYPE_SUBJECT64_EX = 125
  _BSM_TOKEN_TYPE_TEXT = 40
  _BSM_TOKEN_TYPE_ZONENAME = 96

  _BSM_ARGUMENT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ARGUMENT32,
      _BSM_TOKEN_TYPE_ARGUMENT64)

  _BSM_ATTR_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_ATTR,
      _BSM_TOKEN_TYPE_ATTR32,
      _BSM_TOKEN_TYPE_ATTR64)

  _BSM_EXEV_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXEC_ARGUMENTS,
      _BSM_TOKEN_TYPE_EXEC_ENV)

  _BSM_HEADER_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_HEADER32,
      _BSM_TOKEN_TYPE_HEADER32_EX,
      _BSM_TOKEN_TYPE_HEADER64)

  _BSM_PROCESS_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32,
      _BSM_TOKEN_TYPE_PROCESS64)

  _BSM_PROCESS_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PROCESS32_EX,
      _BSM_TOKEN_TYPE_PROCESS64_EX)

  _BSM_RETURN_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_EXIT,
      _BSM_TOKEN_TYPE_RETURN32,
      _BSM_TOKEN_TYPE_RETURN64)

  _BSM_SUBJECT_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32,
      _BSM_TOKEN_TYPE_SUBJECT64)

  _BSM_SUBJECT_EX_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_SUBJECT32_EX,
      _BSM_TOKEN_TYPE_SUBJECT64_EX)

  _BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES = (
      _BSM_TOKEN_TYPE_PATH,
      _BSM_TOKEN_TYPE_TEXT,
      _BSM_TOKEN_TYPE_ZONENAME)

  def __init__(self):
    """Initializes a parser object."""
    super(BSMParser, self).__init__()
    # Create the dictionary with all token IDs: tested and untested.
    self._bsm_type_list_all = self._BSM_TOKEN_TYPES.copy()
    self._bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

  def _CopyByteArrayToBase16String(self, byte_array):
    """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array (bytes): A byte array.

    Returns:
      str: a base-16 encoded Unicode string.
    """
    return ''.join(['{0:02x}'.format(byte) for byte in byte_array])

  def _CopyUtf8ByteArrayToString(self, byte_array):
    """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array (bytes): A byte array containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    byte_stream = b''.join(map(chr, byte_array))

    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning('Unable to decode UTF-8 formatted byte array.')
      string = byte_stream.decode('utf-8', errors='ignore')

    string, _, _ = string.partition(b'\x00')
    return string

  def _IPv4Format(self, address):
    """Formats an IPv4 address as a human readable string.

    Args:
      address (int): IPv4 address.

    Returns:
      str: human readable string of IPv4 address in 4 octet representation:
          "1.2.3.4".
    """
    ipv4_string = self.IPV4_STRUCT.build(address)
    return socket.inet_ntoa(ipv4_string)

  def _IPv6Format(self, high, low):
    """Formats an IPv6 address as a human readable string.

    Args:
      high (int): upper 64-bit part of the IPv6 address.
      low (int): lower 64-bit part of the IPv6 address.

    Returns:
      str: human readable string of IPv6 address.
    """
    ipv6_string = self.IPV6_STRUCT.build(
        construct.Container(high=high, low=low))
    # socket.inet_ntop not supported in Windows.
    if hasattr(socket, 'inet_ntop'):
      return socket.inet_ntop(socket.AF_INET6, ipv6_string)

    # TODO: this approach returns double "::", illegal IPv6 addr.
    str_address = binascii.hexlify(ipv6_string)
    address = []
    blank = False
    for pos in range(0, len(str_address), 4):
      if str_address[pos:pos + 4] == '0000':
        if not blank:
          address.append('')
          blank = True
      else:
        blank = False
        address.append(str_address[pos:pos + 4].lstrip('0'))
    return ':'.join(address)

  def _ParseBSMEvent(self, parser_mediator, file_object):
    """Parses a BSM entry (BSMEvent) from the file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if the BSM entry was parsed.
    """
    record_start_offset = file_object.tell()

    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM token type at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      parser_mediator.ProduceExtractionError(
          'unsupported token type: {0:d} at offset: 0x{1:08x}.'.format(
              token_type, record_start_offset))
      # TODO: if it is a Mac OS X, search for the trailer magic value
      #       as a end of the entry can be a possibility to continue.
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      token = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError) as exception:
      parser_mediator.ProduceExtractionError((
          'unable to parse BSM record at offset: 0x{0:08x} with error: '
          '{1:s}.').format(record_start_offset, exception))
      return False

    event_type = bsmtoken.BSM_AUDIT_EVENT.get(
        token.bsm_header.event_type, 'UNKNOWN')
    event_type = '{0:s} ({1:d})'.format(
        event_type, token.bsm_header.event_type)

    timestamp = (token.timestamp * 1000000) + token.microseconds
    date_time = dfdatetime_posix_time.PosixTimeInMicroseconds(
        timestamp=timestamp)

    record_length = token.bsm_header.length
    record_end_offset = record_start_offset + record_length

    # A dict of tokens that has the entry.
    extra_tokens = {}

    # Read until we reach the end of the record.
    while file_object.tell() < record_end_offset:
      # Check if it is a known token.
      try:
        token_type = self._BSM_TOKEN.parse_stream(file_object)
      except (IOError, construct.FieldError):
        logging.warning(
            'Unable to parse the Token ID at position: {0:d}'.format(
                file_object.tell()))
        return False

      _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

      if not record_structure:
        pending = record_end_offset - file_object.tell()
        new_extra_tokens = self.TryWithUntestedStructures(
            file_object, token_type, pending)
        extra_tokens.update(new_extra_tokens)
      else:
        token = record_structure.parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_type, token, file_object)
        extra_tokens.update(new_extra_tokens)

    if file_object.tell() > record_end_offset:
      logging.warning(
          'Token ID {0:d} not expected at position 0x{1:08x}.'
          'Jumping for the next entry.'.format(
              token_type, file_object.tell()))
      try:
        file_object.seek(
            record_end_offset - file_object.tell(), os.SEEK_CUR)
      except (IOError, construct.FieldError) as exception:
        logging.warning(
            'Unable to jump to next entry with error: {0:s}'.format(exception))
        return False

    # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
    if parser_mediator.platform != 'MacOSX':
      event_data = BSMEventData()
    else:
      event_data = MacBSMEventData()

      # In Mac OS X the last two tokens are the return status and the trailer.
      return_value = extra_tokens.get('BSM_TOKEN_RETURN32')
      if not return_value:
        return_value = extra_tokens.get('BSM_TOKEN_RETURN64')
      if not return_value:
        return_value = 'UNKNOWN'

      event_data.return_value = return_value

    event_data.event_type = event_type
    event_data.extra_tokens = extra_tokens
    event_data.offset = record_start_offset
    event_data.record_length = record_length

    # TODO: check why trailer was passed to event in original while
    # event was expecting record length.
    # if extra_tokens:
    #   trailer = extra_tokens.get('BSM_TOKEN_TRAILER', 'unknown')

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_CREATION)
    parser_mediator.ProduceEventWithEventData(event, event_data)

    return True

  def _RawToUTF8(self, byte_stream):
    """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream (bytes): byte stream containing an UTF-8 encoded string.

    Returns:
      str: A Unicode string.
    """
    try:
      string = byte_stream.decode('utf-8')
    except UnicodeDecodeError:
      logging.warning(
          'Decode UTF8 failed, the message string may be cut short.')
      string = byte_stream.decode('utf-8', errors='ignore')
    return string.partition(b'\x00')[0]

  def ParseFileObject(self, parser_mediator, file_object, **kwargs):
    """Parses a BSM file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    try:
      is_bsm = self.VerifyFile(parser_mediator, file_object)
    except (IOError, construct.FieldError) as exception:
      raise errors.UnableToParseFile(
          'Unable to parse BSM file with error: {0:s}'.format(exception))

    if not is_bsm:
      raise errors.UnableToParseFile('Not a BSM File, unable to parse.')

    file_object.seek(0, os.SEEK_SET)

    while self._ParseBSMEvent(parser_mediator, file_object):
      pass

  def VerifyFile(self, parser_mediator, file_object):
    """Check if the file is a BSM file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Returns:
      bool: True if this is a valid BSM file, False otherwise.
    """
    # First part of the entry is always a Header.
    try:
      token_type = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if token_type not in self._BSM_HEADER_TOKEN_TYPES:
      return False

    _, record_structure = self._BSM_TOKEN_TYPES.get(token_type, ('', None))

    try:
      header = record_structure.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
      return False

    try:
      token_identifier = self._BSM_TOKEN.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return False

    # If is Mac OS X BSM file, next entry is a  text token indicating
    # if it is a normal start or it is a recovery track.
    if parser_mediator.platform == 'MacOSX':
      token_type, record_structure = self._BSM_TOKEN_TYPES.get(
          token_identifier, ('', None))

      if not record_structure:
        return False

      if token_type != 'BSM_TOKEN_TEXT':
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

      try:
        token = record_structure.parse_stream(file_object)
      except (IOError, construct.FieldError):
        return

      text = self._CopyUtf8ByteArrayToString(token.text)
      if (text != 'launchctl::Audit startup' and
          text != 'launchctl::Audit recovery'):
        logging.warning('It is not a valid first entry for Mac OS X BSM.')
        return False

    return True

  def TryWithUntestedStructures(self, file_object, token_id, pending):
    """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
    # Data from the unknown structure.
    start_position = file_object.tell()
    start_token_id = token_id
    extra_tokens = {}

    # Read all the "pending" bytes.
    try:
      if token_id in self._bsm_type_list_all:
        token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
        new_extra_tokens = self.FormatToken(token_id, token, file_object)
        extra_tokens.update(new_extra_tokens)
        while file_object.tell() < (start_position + pending):
          # Check if it is a known token.
          try:
            token_id = self._BSM_TOKEN.parse_stream(file_object)
          except (IOError, construct.FieldError):
            logging.warning(
                'Unable to parse the Token ID at position: {0:d}'.format(
                    file_object.tell()))
            return
          if token_id not in self._bsm_type_list_all:
            break
          token = self._bsm_type_list_all[token_id][1].parse_stream(file_object)
          new_extra_tokens = self.FormatToken(token_id, token, file_object)
          extra_tokens.update(new_extra_tokens)
    except (IOError, construct.FieldError):
      token_id = 255

    next_entry = (start_position + pending)
    if file_object.tell() != next_entry:
      # Unknown Structure.
      logging.warning('Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
          start_position - 1, token_id, token_id))
      # TODO: another way to save this information must be found.
      extra_tokens.update(
          {'message': self.MESSAGE_CAN_NOT_SAVE.format(
              start_position - 1, start_token_id)})
      # Move to next entry.
      file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
      # It returns null list because it doesn't know witch structure was
      # the incorrect structure that makes that it can arrive to the spected
      # end of the entry.
      return {}
    return extra_tokens

  def FormatToken(self, token_id, token, file_object):
    """Parse the Token depending of the type of the structure.

    Args:
      token_id (int): identification of the token_type.
      token (structure): token struct to parse.
      file_object: BSM file.

    Returns:
      (dict): parsed Token values.

    Keys for returned dictionary are token name like BSM_TOKEN_SUBJECT32.
    Values of this dictionary are key-value pairs like terminal_ip:127.0.0.1.
    """
    if token_id not in self._bsm_type_list_all:
      return {}

    bsm_type, _ = self._bsm_type_list_all.get(token_id, ['', ''])

    if token_id in self._BSM_UTF8_BYTE_ARRAY_TOKEN_TYPES:
      try:
        string = self._CopyUtf8ByteArrayToString(token.text)
      except TypeError:
        string = 'Unknown'
      return {bsm_type: string}

    elif token_id in self._BSM_RETURN_TOKEN_TYPES:
      return {bsm_type: {
          'error': bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'),
          'token_status': token.status,
          'call_status': token.return_value
      }}

    elif token_id in self._BSM_SUBJECT_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_SUBJECT_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif token_id in self._BSM_ARGUMENT_TOKEN_TYPES:
      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {
          'string': string,
          'num_arg': token.num_arg,
          'is': token.name_arg}}

    elif token_id in self._BSM_EXEV_TOKEN_TYPES:
      arguments = []
      for _ in range(0, token):
        sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object)
        string = self._CopyUtf8ByteArrayToString(sub_token.text)
        arguments.append(string)
      return {bsm_type: ' '.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv4Format(token.ipv4)
      }}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'port': token.port_number,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_ADDR':
      return {bsm_type: self._IPv4Format(token)}

    elif bsm_type == 'BSM_TOKEN_IP':
      return {'IPv4_Header': '0x{0:s}]'.format(token.encode('hex'))}

    elif bsm_type == 'BSM_TOKEN_ADDR_EXT':
      return {bsm_type: {
          'protocols':
          bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'),
          'net_type': token.net_type,
          'address': self._IPv6Format(token.ipv6.high, token.ipv6.low)
      }}

    elif bsm_type == 'BSM_TOKEN_PORT':
      return {bsm_type: token}

    elif bsm_type == 'BSM_TOKEN_TRAILER':
      return {bsm_type: token.record_length}

    elif bsm_type == 'BSM_TOKEN_FILE':
      # TODO: if this timestamp is usefull, it must be extracted as a separate
      #       event object.
      timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
          token.timestamp, token.microseconds)
      date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
      date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S')

      string = self._CopyUtf8ByteArrayToString(token.text)
      return {bsm_type: {'string': string, 'timestamp': date_time_string}}

    elif bsm_type == 'BSM_TOKEN_IPC':
      return {bsm_type: {
          'object_type': token.object_type,
          'object_id': token.object_id
      }}

    elif token_id in self._BSM_PROCESS_TOKEN_TYPES:
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': self._IPv4Format(token.ipv4)
      }}

    elif token_id in self._BSM_PROCESS_EX_TOKEN_TYPES:
      if token.bsm_ip_type_short.net_type == self.AU_IPv6:
        ip = self._IPv6Format(
            token.bsm_ip_type_short.ip_addr.high,
            token.bsm_ip_type_short.ip_addr.low)
      elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
        ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
      else:
        ip = 'unknown'
      return {bsm_type: {
          'aid': token.subject_data.audit_uid,
          'euid': token.subject_data.effective_uid,
          'egid': token.subject_data.effective_gid,
          'uid': token.subject_data.real_uid,
          'gid': token.subject_data.real_gid,
          'pid': token.subject_data.pid,
          'session_id': token.subject_data.session_id,
          'terminal_port': token.terminal_port,
          'terminal_ip': ip
      }}

    elif bsm_type == 'BSM_TOKEN_DATA':
      data = []
      data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '')

      if data_type == 'AUR_CHAR':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))

      elif data_type == 'AUR_SHORT':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_SHORT.parse_stream(file_object))

      elif data_type == 'AUR_INT32':
        for _ in range(token.unit_count):
          data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))

      else:
        data.append('Unknown type data')

      # TODO: the data when it is string ends with ".", HW a space is return
      #       after uses the UTF-8 conversion.
      return {bsm_type: {
          'format': bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
          'data':
          '{0}'.format(self._RawToUTF8(''.join(map(str, data))))
      }}

    elif token_id in self._BSM_ATTR_TOKEN_TYPES:
      return {bsm_type: {
          'mode': token.file_mode,
          'uid': token.uid,
          'gid': token.gid,
          'system_id': token.file_system_id,
          'node_id': token.file_system_node_id,
          'device': token.device}}

    elif bsm_type == 'BSM_TOKEN_GROUPS':
      arguments = []
      for _ in range(token):
        arguments.append(
            self._RawToUTF8(
                self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
      return {bsm_type: ','.join(arguments)}

    elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX':
      if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6':
        saddr = self._IPv6Format(
            token.structure_addr_port.saddr_high,
            token.structure_addr_port.saddr_low)
        daddr = self._IPv6Format(
            token.structure_addr_port.daddr_high,
            token.structure_addr_port.daddr_low)
      else:
        saddr = self._IPv4Format(token.structure_addr_port.source_address)
        daddr = self._IPv4Format(token.structure_addr_port.destination_address)

      return {bsm_type:{
          'from': saddr,
          'from_port': token.structure_addr_port.source_port,
          'to': daddr,
          'to_port': token.structure_addr_port.destination_port}}

    elif bsm_type == 'BSM_TOKEN_IPC_PERM':
      return {bsm_type: {
          'user_id': token.user_id,
          'group_id': token.group_id,
          'creator_user_id': token.creator_user_id,
          'creator_group_id': token.creator_group_id,
          'access': token.access_mode}}

    elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX':
      string = self._CopyUtf8ByteArrayToString(token.path)
      return {bsm_type: {'family': token.family, 'path': string}}

    elif bsm_type == 'BSM_TOKEN_OPAQUE':
      string = self._CopyByteArrayToBase16String(token.text)
      return {bsm_type: string}

    elif bsm_type == 'BSM_TOKEN_SEQUENCE':
      return {bsm_type: token}
Beispiel #12
0
class FirefoxCache2Parser(BaseFirefoxCacheParser):
    """Parses Firefox cache version 2 files (Firefox 32 or later)."""

    NAME = 'firefox_cache2'
    DESCRIPTION = (
        'Parser for Firefox Cache version 2 files (Firefox 32 or later).')

    _CACHE_VERSION = 2

    # Cache 2 filenames are SHA-1 hex digests.
    # TODO: change into regexp.
    _CACHE_FILENAME = pyparsing.Word(pyparsing.hexnums, exact=40)

    # The last four bytes of a file gives the size of the cached content.
    _LENGTH = construct.UBInt32('length')

    _CACHE_RECORD_HEADER_STRUCT = construct.Struct(
        'record_header', construct.UBInt32('major'),
        construct.UBInt32('fetch_count'), construct.UBInt32('last_fetched'),
        construct.UBInt32('last_modified'), construct.UBInt32('frequency'),
        construct.UBInt32('expire_time'), construct.UBInt32('request_size'))

    _CHUNK_SIZE = 512 * 1024

    def _GetStartOfMetadata(self, file_object):
        """Determine the byte offset of the cache record metadata in cache file.

     This method is inspired by the work of James Habben:
     https://github.com/JamesHabben/FirefoxCache2

    Args:
      file_object: The file containing the cache record.
    """
        file_object.seek(-4, os.SEEK_END)

        try:
            length = self._LENGTH.parse_stream(file_object)
        except (IOError, construct.FieldError):
            raise IOError(
                'Could not find metadata offset in Firefox cache file.')

        # Firefox splits the content into chunks.
        hash_chunks, remainder = divmod(length, self._CHUNK_SIZE)
        if remainder != 0:
            hash_chunks += 1

        # Each chunk in the cached record is padded with two bytes.
        return length + (hash_chunks * 2)

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Firefox cache file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        # TODO: determine if the minimum file size is really 4 bytes.
        if file_object.get_size() < 4:
            raise errors.UnableToParseFile('Not a Firefox cache2 file.')

        filename = parser_mediator.GetFilename()
        try:
            # Match cache2 filename (SHA-1 hex of cache record key).
            self._CACHE_FILENAME.parseString(filename)
        except pyparsing.ParseException:
            raise errors.UnableToParseFile('Not a Firefox cache2 file.')

        if file_object.get_size() == 0:
            raise errors.UnableToParseFile('Empty file.')

        meta_start = self._GetStartOfMetadata(file_object)

        file_object.seek(meta_start, os.SEEK_SET)

        # Skip the first 4 bytes of metadata which contains a hash value of
        # the cached content.
        file_object.seek(4, os.SEEK_CUR)

        try:
            cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream(
                file_object)
        except (IOError, construct.FieldError):
            raise errors.UnableToParseFile('Not a Firefox cache2 file.')

        if not self._ValidateCacheRecordHeader(cache_record_header):
            raise errors.UnableToParseFile(
                'Not a valid Firefox cache2 record.')

        url = file_object.read(cache_record_header.request_size)

        header_data = file_object.read()

        display_name = parser_mediator.GetDisplayName()
        request_method, response_code = self._ParseHTTPHeaders(
            header_data, meta_start, display_name)

        event_data = FirefoxCacheEventData()
        event_data.fetch_count = cache_record_header.fetch_count
        event_data.frequency = cache_record_header.frequency
        event_data.major = cache_record_header.major
        event_data.request_method = request_method
        event_data.request_size = cache_record_header.request_size
        event_data.response_code = response_code
        event_data.version = self._CACHE_VERSION
        event_data.url = url

        date_time = dfdatetime_posix_time.PosixTime(
            timestamp=cache_record_header.last_fetched)
        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_LAST_VISITED)
        parser_mediator.ProduceEventWithEventData(event, event_data)

        if cache_record_header.last_modified:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=cache_record_header.last_modified)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_WRITTEN)
            parser_mediator.ProduceEventWithEventData(event, event_data)

        if cache_record_header.expire_time:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=cache_record_header.expire_time)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
            parser_mediator.ProduceEventWithEventData(event, event_data)
Beispiel #13
0
class CupsIppParser(interface.FileObjectParser):
    """Parser for CUPS IPP files. """

    NAME = 'cups_ipp'
    DESCRIPTION = 'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct',
                                       construct.UBInt8('major_version'),
                                       construct.UBInt8('minor_version'),
                                       construct.UBInt16('operation_id'),
                                       construct.UBInt32('request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID, per cups source file ipp-support.c.
    TYPE_GENERAL_INTEGER = 0x20
    TYPE_INTEGER = 0x21
    TYPE_BOOL = 0x22
    TYPE_ENUMERATION = 0x23
    TYPE_DATETIME = 0x31

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8('integer')
    INTEGER_32 = construct.UBInt32('integer')
    TEXT = construct.PascalString('text',
                                  length_field=construct.UBInt8('length'))
    BOOLEAN = construct.Struct('boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct('integer_value', construct.Padding(1),
                               INTEGER_32)

    # This is an RFC2579 datetime.
    DATETIME = construct.Struct(
        'datetime',
        construct.Padding(1),
        construct.UBInt16('year'),
        construct.UBInt8('month'),
        construct.UBInt8('day'),
        construct.UBInt8('hour'),
        construct.UBInt8('minutes'),
        construct.UBInt8('seconds'),
        construct.UBInt8('deciseconds'),
        construct.String('direction_from_utc', length=1, encoding='ascii'),
        construct.UBInt8('hours_from_utc'),
        construct.UBInt8('minutes_from_utc'),
    )

    # Name of the pair.
    PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    _NAME_PAIR_TRANSLATION = {
        'com.apple.print.JobInfo.PMApplicationName': 'application',
        'com.apple.print.JobInfo.PMJobOwner': 'owner',
        'DestinationPrinterID': 'printer_id',
        'document-format': 'doc_type',
        'job-name': 'job_name',
        'job-originating-host-name': 'computer_name',
        'job-originating-user-name': 'user',
        'job-uuid': 'job_id',
        'printer-uri': 'uri'
    }

    _DATE_TIME_VALUES = {
        'date-time-at-creation': definitions.TIME_DESCRIPTION_CREATION,
        'date-time-at-processing': definitions.TIME_DESCRIPTION_START,
        'date-time-at-completed': definitions.TIME_DESCRIPTION_END
    }

    _POSIX_TIME_VALUES = {
        'time-at-creation': definitions.TIME_DESCRIPTION_CREATION,
        'time-at-processing': definitions.TIME_DESCRIPTION_START,
        'time-at-completed': definitions.TIME_DESCRIPTION_END
    }

    _DATE_TIME_VALUE_NAMES = list(_DATE_TIME_VALUES.keys())
    _DATE_TIME_VALUE_NAMES.extend(list(_POSIX_TIME_VALUES.keys()))

    def _GetStringValue(self, data_dict, name, default_value=None):
        """Retrieves a specific string value from the data dict.

    Args:
      data_dict (dict[str, list[str]): values per name.
      name (str): name of the value to retrieve.

    Returns:
      str: value represented as a string.
    """
        values = data_dict.get(name, None)
        if not values:
            return default_value

        for index, value in enumerate(values):
            if ',' in value:
                values[index] = '"{0:s}"'.format(value)

        return ', '.join(values)

    def _ReadPair(self, parser_mediator, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      tuple: contains:

        str: name or None.
        str: value or None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse pair identifier with error: {0!s}'.format(
                    exception))
            return None, None

        # Name = Length name + name + 0x00
        try:
            pair_name = self.PAIR_NAME.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse pair name with error: {0!s}'.format(
                    exception))
            return None, None

        try:
            name = pair_name.text.decode('utf-8')
        except UnicodeDecodeError as exception:
            parser_mediator.ProduceExtractionError(
                'unable to decode pair name with error: {0!s}'.format(
                    exception))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                       self.TYPE_ENUMERATION):
            value_structure = self.INTEGER
        elif type_id == self.TYPE_BOOL:
            value_structure = self.BOOLEAN
        elif type_id == self.TYPE_DATETIME:
            value_structure = self.DATETIME
        else:
            value_structure = self.TEXT

        try:
            value = value_structure.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceExtractionError(
                'unable to parse value with error: {0!s}'.format(exception))
            return None, None

        if type_id in (self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                       self.TYPE_ENUMERATION):
            value = value.integer

        elif type_id == self.TYPE_BOOL:
            value = bool(value.integer)

        elif type_id == self.TYPE_DATETIME:
            rfc2579_date_time_tuple = (value.year, value.month, value.day,
                                       value.hour, value.minutes,
                                       value.seconds, value.deciseconds,
                                       value.direction_from_utc,
                                       value.hours_from_utc,
                                       value.minutes_from_utc)
            value = dfdatetime_rfc2579_date_time.RFC2579DateTime(
                rfc2579_date_time_tuple=rfc2579_date_time_tuple)

        else:
            try:
                value = value.decode('utf-8')
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceExtractionError(
                    'unable to decode value with error: {0!s}'.format(
                        exception))
                return None, None

        return name, value

    def _ReadPairs(self, parser_mediator, file_object):
        """Reads the attribute name and value pairs from a CUPS IPP event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      dict[str, list[str]]: values per name.
    """
        data_dict = {}

        name, value = self._ReadPair(parser_mediator, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self._NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self._ReadPair(parser_mediator, file_object)

        return data_dict

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a CUPS IPP file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                'Unable to parse CUPS IPP Header with error: {0!s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            raise errors.UnableToParseFile(
                '[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logger.debug(
                '[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_mediator.GetDisplayName()))

        data_dict = self._ReadPairs(parser_mediator, file_object)

        time_dict = {}

        for name in self._DATE_TIME_VALUE_NAMES:
            value = data_dict.get(name, None)
            if value is not None:
                time_dict[name] = value
                del data_dict[name]

        event_data = CupsIppEventData()
        event_data.application = self._GetStringValue(data_dict, 'application')
        event_data.computer_name = self._GetStringValue(
            data_dict, 'computer_name')
        event_data.copies = data_dict.get('copies', [0])[0]
        event_data.data_dict = data_dict
        event_data.doc_type = self._GetStringValue(data_dict, 'doc_type')
        event_data.job_id = self._GetStringValue(data_dict, 'job_id')
        event_data.job_name = self._GetStringValue(data_dict, 'job_name')
        event_data.user = self._GetStringValue(data_dict, 'user')
        event_data.owner = self._GetStringValue(data_dict, 'owner')
        event_data.printer_id = self._GetStringValue(data_dict, 'printer_id')
        event_data.uri = self._GetStringValue(data_dict, 'uri')

        for name, usage in iter(self._DATE_TIME_VALUES.items()):
            time_values = time_dict.get(name, [])
            for date_time in time_values:
                event = time_events.DateTimeValuesEvent(date_time, usage)
                parser_mediator.ProduceEventWithEventData(event, event_data)

        for name, usage in iter(self._POSIX_TIME_VALUES.items()):
            time_values = time_dict.get(name, [])
            for time_value in time_values:
                date_time = dfdatetime_posix_time.PosixTime(
                    timestamp=time_value)
                event = time_events.DateTimeValuesEvent(date_time, usage)
                parser_mediator.ProduceEventWithEventData(event, event_data)
Beispiel #14
0
        elif whence == 1:
            self.offset += offset
        elif whence == 2:
            self.offset = self.size() + offset
        else:
            assert False
        self.offset = min(self.offset, self.size())
        self.offset = max(self.offset, 0)

    def tell(self):
        return self.offset


FileVaultV2Header = construct.Struct(
    "FileVaultV2Header", construct.Magic("encrcdsa"),
    construct.UBInt32("version"), construct.UBInt32("encIVSize"),
    construct.UBInt32("_unk1"), construct.UBInt32("_unk2"),
    construct.UBInt32("keyBits"), construct.UBInt32("_unk4"),
    construct.UBInt32("_unk5"), construct.Array(4,
                                                construct.UBInt32("UDIFID")),
    construct.UBInt32("blockSize"), construct.UBInt64("dataSize"),
    construct.UBInt64("dataOffset"), construct.Padding(0x260),
    construct.UBInt32("kdfAlgorithm"), construct.UBInt32("kdfPRNGAlgorithm"),
    construct.UBInt32("kdfIterationCount"), construct.UBInt32("kdfSaltLen"),
    construct.String("kdfSalt", 0x20), construct.UBInt32("blobEncIVSize"),
    construct.String("blobEncIV", 0x20), construct.UBInt32("blobEncKeyBits"),
    construct.UBInt32("blobEncAlgorithm"), construct.UBInt32("blobEncPadding"),
    construct.UBInt32("blobEncMode"),
    construct.UBInt32("encryptedKeyblobSize"),
    construct.String("encryptedKeyblob", 0x30))
Beispiel #15
0
class BsmParser(interface.FileObjectParser):
    """Parser for BSM files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'bsm_log'
    DESCRIPTION = u'Parser for BSM log files.'

    # BSM supported version (0x0b = 11).
    AUDIT_HEADER_VERSION = 11

    # Magic Trail Header.
    BSM_TOKEN_TRAILER_MAGIC = b'b105'

    # IP Version constants.
    AU_IPv4 = 4
    AU_IPv6 = 16

    IPV4_STRUCT = construct.UBInt32(u'ipv4')

    IPV6_STRUCT = construct.Struct(u'ipv6', construct.UBInt64(u'high'),
                                   construct.UBInt64(u'low'))

    # Tested structures.
    # INFO: I have ommited the ID in the structures declaration.
    #       I used the BSM_TYPE first to read the ID, and then, the structure.
    # Tokens always start with an ID value that identifies their token
    # type and subsequent structure.
    BSM_TYPE = construct.UBInt8(u'token_id')

    # Data type structures.
    BSM_TOKEN_DATA_CHAR = construct.String(u'value', 1)
    BSM_TOKEN_DATA_SHORT = construct.UBInt16(u'value')
    BSM_TOKEN_DATA_INTEGER = construct.UBInt32(u'value')

    # Common structure used by other structures.
    # audit_uid: integer, uid that generates the entry.
    # effective_uid: integer, the permission user used.
    # effective_gid: integer, the permission group used.
    # real_uid: integer, user id of the user that execute the process.
    # real_gid: integer, group id of the group that execute the process.
    # pid: integer, identification number of the process.
    # session_id: unknown, need research.
    BSM_TOKEN_SUBJECT_SHORT = construct.Struct(
        u'subject_data', construct.UBInt32(u'audit_uid'),
        construct.UBInt32(u'effective_uid'),
        construct.UBInt32(u'effective_gid'), construct.UBInt32(u'real_uid'),
        construct.UBInt32(u'real_gid'), construct.UBInt32(u'pid'),
        construct.UBInt32(u'session_id'))

    # Common structure used by other structures.
    # Identify the kind of inet (IPv4 or IPv6)
    # TODO: instead of 16, AU_IPv6 must be used.
    BSM_IP_TYPE_SHORT = construct.Struct(
        u'bsm_ip_type_short', construct.UBInt32(u'net_type'),
        construct.Switch(u'ip_addr',
                         _BsmTokenGetNetType, {16: IPV6_STRUCT},
                         default=IPV4_STRUCT))

    # Initial fields structure used by header structures.
    # length: integer, the length of the entry, equal to trailer (doc: length).
    # version: integer, version of BSM (AUDIT_HEADER_VERSION).
    # event_type: integer, the type of event (/etc/security/audit_event).
    # modifier: integer, unknown, need research (It is always 0).
    BSM_HEADER = construct.Struct(u'bsm_header', construct.UBInt32(u'length'),
                                  construct.UBInt8(u'version'),
                                  construct.UBInt16(u'event_type'),
                                  construct.UBInt16(u'modifier'))

    # First token of one entry.
    # timestamp: unsigned integer, number of seconds since
    #            January 1, 1970 00:00:00 UTC.
    # microsecond: unsigned integer, number of micro seconds.
    BSM_HEADER32 = construct.Struct(u'bsm_header32', BSM_HEADER,
                                    construct.UBInt32(u'timestamp'),
                                    construct.UBInt32(u'microsecond'))

    BSM_HEADER64 = construct.Struct(u'bsm_header64', BSM_HEADER,
                                    construct.UBInt64(u'timestamp'),
                                    construct.UBInt64(u'microsecond'))

    BSM_HEADER32_EX = construct.Struct(u'bsm_header32_ex', BSM_HEADER,
                                       BSM_IP_TYPE_SHORT,
                                       construct.UBInt32(u'timestamp'),
                                       construct.UBInt32(u'microsecond'))

    # Token TEXT, provides extra information.
    BSM_TOKEN_TEXT = construct.Struct(
        u'bsm_token_text', construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Path of the executable.
    BSM_TOKEN_PATH = BSM_TOKEN_TEXT

    # Identified the end of the record (follow by TRAILER).
    # status: integer that identifies the status of the exit (BSM_ERRORS).
    # return: returned value from the operation.
    BSM_TOKEN_RETURN32 = construct.Struct(u'bsm_token_return32',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt32(u'return_value'))

    BSM_TOKEN_RETURN64 = construct.Struct(u'bsm_token_return64',
                                          construct.UBInt8(u'status'),
                                          construct.UBInt64(u'return_value'))

    # Identified the number of bytes that was written.
    # magic: 2 bytes that identifies the TRAILER (BSM_TOKEN_TRAILER_MAGIC).
    # length: integer that has the number of bytes from the entry size.
    BSM_TOKEN_TRAILER = construct.Struct(u'bsm_token_trailer',
                                         construct.UBInt16(u'magic'),
                                         construct.UBInt32(u'record_length'))

    # A 32-bits argument.
    # num_arg: the number of the argument.
    # name_arg: the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT32 = construct.Struct(
        u'bsm_token_argument32', construct.UBInt8(u'num_arg'),
        construct.UBInt32(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # A 64-bits argument.
    # num_arg: integer, the number of the argument.
    # name_arg: text, the argument's name.
    # text: the string value of the argument.
    BSM_TOKEN_ARGUMENT64 = construct.Struct(
        u'bsm_token_argument64', construct.UBInt8(u'num_arg'),
        construct.UBInt64(u'name_arg'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # Identify an user.
    # terminal_id: unknown, research needed.
    # terminal_addr: unknown, research needed.
    BSM_TOKEN_SUBJECT32 = construct.Struct(u'bsm_token_subject32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # Identify an user using a extended Token.
    # terminal_port: unknown, need research.
    # net_type: unknown, need research.
    BSM_TOKEN_SUBJECT32_EX = construct.Struct(
        u'bsm_token_subject32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_opaque // AUT_OPAQUE
    BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT

    # au_to_seq // AUT_SEQ
    BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER

    # Program execution with options.
    # For each argument we are going to have a string+ "\x00".
    # Example: [00 00 00 02][41 42 43 00 42 42 00]
    #          2 Arguments, Arg1: [414243] Arg2: [4242].
    BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32(u'number_arguments')

    BSM_TOKEN_EXEC_ARGUMENT = construct.Struct(
        u'bsm_token_exec_argument',
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("text", 1)))

    # au_to_in_addr // AUT_IN_ADDR:
    BSM_TOKEN_ADDR = IPV4_STRUCT

    # au_to_in_addr_ext // AUT_IN_ADDR_EX:
    BSM_TOKEN_ADDR_EXT = construct.Struct(u'bsm_token_addr_ext',
                                          construct.UBInt32(u'net_type'),
                                          IPV6_STRUCT)

    # au_to_ip // AUT_IP:
    # TODO: parse this header in the correct way.
    BSM_TOKEN_IP = construct.String(u'binary_ipv4_add', 20)

    # au_to_ipc // AUT_IPC:
    BSM_TOKEN_IPC = construct.Struct(u'bsm_token_ipc',
                                     construct.UBInt8(u'object_type'),
                                     construct.UBInt32(u'object_id'))

    # au_to_ipc_perm // au_to_ipc_perm
    BSM_TOKEN_IPC_PERM = construct.Struct(
        u'bsm_token_ipc_perm', construct.UBInt32(u'user_id'),
        construct.UBInt32(u'group_id'), construct.UBInt32(u'creator_user_id'),
        construct.UBInt32(u'creator_group_id'),
        construct.UBInt32(u'access_mode'), construct.UBInt32(u'slot_seq'),
        construct.UBInt32(u'key'))

    # au_to_iport // AUT_IPORT:
    BSM_TOKEN_PORT = construct.UBInt16(u'port_number')

    # au_to_file // AUT_OTHER_FILE32:
    BSM_TOKEN_FILE = construct.Struct(
        u'bsm_token_file', construct.UBInt32(u'timestamp'),
        construct.UBInt32(u'microsecond'), construct.UBInt16(u'length'),
        construct.Array(_BsmTokenGetLength, construct.UBInt8(u'text')))

    # au_to_subject64 // AUT_SUBJECT64:
    BSM_TOKEN_SUBJECT64 = construct.Struct(u'bsm_token_subject64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_subject64_ex // AU_IPv4:
    BSM_TOKEN_SUBJECT64_EX = construct.Struct(
        u'bsm_token_subject64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'),
        construct.UBInt32(u'terminal_type'), BSM_IP_TYPE_SHORT)

    # au_to_process32 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS32 = construct.Struct(u'bsm_token_process32',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt32(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process64 // AUT_PROCESS32:
    BSM_TOKEN_PROCESS64 = construct.Struct(u'bsm_token_process64',
                                           BSM_TOKEN_SUBJECT_SHORT,
                                           construct.UBInt64(u'terminal_port'),
                                           IPV4_STRUCT)

    # au_to_process32_ex // AUT_PROCESS32_EX:
    BSM_TOKEN_PROCESS32_EX = construct.Struct(
        u'bsm_token_process32_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt32(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_process64_ex // AUT_PROCESS64_EX:
    BSM_TOKEN_PROCESS64_EX = construct.Struct(
        u'bsm_token_process64_ex', BSM_TOKEN_SUBJECT_SHORT,
        construct.UBInt64(u'terminal_port'), BSM_IP_TYPE_SHORT)

    # au_to_sock_inet32 // AUT_SOCKINET32:
    BSM_TOKEN_AUT_SOCKINET32 = construct.Struct(
        u'bsm_token_aut_sockinet32', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV4_STRUCT)

    # Info: checked against the source code of XNU, but not against
    #       real BSM file.
    BSM_TOKEN_AUT_SOCKINET128 = construct.Struct(
        u'bsm_token_aut_sockinet128', construct.UBInt16(u'net_type'),
        construct.UBInt16(u'port_number'), IPV6_STRUCT)

    INET6_ADDR_TYPE = construct.Struct(u'addr_type',
                                       construct.UBInt16(u'ip_type'),
                                       construct.UBInt16(u'source_port'),
                                       construct.UBInt64(u'saddr_high'),
                                       construct.UBInt64(u'saddr_low'),
                                       construct.UBInt16(u'destination_port'),
                                       construct.UBInt64(u'daddr_high'),
                                       construct.UBInt64(u'daddr_low'))

    INET4_ADDR_TYPE = construct.Struct(
        u'addr_type', construct.UBInt16(u'ip_type'),
        construct.UBInt16(u'source_port'),
        construct.UBInt32(u'source_address'),
        construct.UBInt16(u'destination_port'),
        construct.UBInt32(u'destination_address'))

    # au_to_socket_ex // AUT_SOCKET_EX
    # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6.
    BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct(
        u'bsm_token_aut_sockinet32_ex', construct.UBInt16(u'socket_domain'),
        construct.UBInt16(u'socket_type'),
        construct.Switch(u'structure_addr_port',
                         _BsmTokenGetSocketDomain, {26: INET6_ADDR_TYPE},
                         default=INET4_ADDR_TYPE))

    # au_to_sock_unix // AUT_SOCKUNIX
    BSM_TOKEN_SOCKET_UNIX = construct.Struct(
        u'bsm_token_au_to_sock_unix', construct.UBInt16(u'family'),
        construct.RepeatUntil(_BsmTokenIsEndOfString,
                              construct.StaticField("path", 1)))

    # au_to_data // au_to_data
    # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT.
    # type: bsmtoken.BSM_TOKEN_DATA_TYPE.
    # unit_count: number of type values.
    # BSM_TOKEN_DATA has a end field = type * unit_count
    BSM_TOKEN_DATA = construct.Struct(u'bsm_token_data',
                                      construct.UBInt8(u'how_to_print'),
                                      construct.UBInt8(u'data_type'),
                                      construct.UBInt8(u'unit_count'))

    # au_to_attr32 // AUT_ATTR32
    BSM_TOKEN_ATTR32 = construct.Struct(
        u'bsm_token_attr32', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt32(u'device'))

    # au_to_attr64 // AUT_ATTR64
    BSM_TOKEN_ATTR64 = construct.Struct(
        u'bsm_token_attr64', construct.UBInt32(u'file_mode'),
        construct.UBInt32(u'uid'), construct.UBInt32(u'gid'),
        construct.UBInt32(u'file_system_id'),
        construct.UBInt64(u'file_system_node_id'),
        construct.UBInt64(u'device'))

    # au_to_exit // AUT_EXIT
    BSM_TOKEN_EXIT = construct.Struct(u'bsm_token_exit',
                                      construct.UBInt32(u'status'),
                                      construct.UBInt32(u'return_value'))

    # au_to_newgroups // AUT_NEWGROUPS
    # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group.
    BSM_TOKEN_GROUPS = construct.UBInt16(u'group_number')

    # au_to_exec_env == au_to_exec_args
    BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS

    # au_to_zonename //AUT_ZONENAME
    BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT

    # Token ID.
    # List of valid Token_ID.
    # Token_ID -> [NAME_STRUCTURE, STRUCTURE]
    # Only the checked structures are been added to the valid structures lists.
    BSM_TYPE_LIST = {
        17: [u'BSM_TOKEN_FILE', BSM_TOKEN_FILE],
        19: [u'BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER],
        20: [u'BSM_HEADER32', BSM_HEADER32],
        21: [u'BSM_HEADER64', BSM_HEADER64],
        33: [u'BSM_TOKEN_DATA', BSM_TOKEN_DATA],
        34: [u'BSM_TOKEN_IPC', BSM_TOKEN_IPC],
        35: [u'BSM_TOKEN_PATH', BSM_TOKEN_PATH],
        36: [u'BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32],
        38: [u'BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32],
        39: [u'BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32],
        40: [u'BSM_TOKEN_TEXT', BSM_TOKEN_TEXT],
        41: [u'BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE],
        42: [u'BSM_TOKEN_ADDR', BSM_TOKEN_ADDR],
        43: [u'BSM_TOKEN_IP', BSM_TOKEN_IP],
        44: [u'BSM_TOKEN_PORT', BSM_TOKEN_PORT],
        45: [u'BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32],
        47: [u'BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE],
        96: [u'BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME],
        113: [u'BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64],
        114: [u'BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64],
        116: [u'BSM_HEADER32_EX', BSM_HEADER32_EX],
        119: [u'BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64],
        122: [u'BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX],
        127: [u'BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX],
        128: [u'BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32]
    }

    # Untested structures.
    # When not tested structure is found, we try to parse using also
    # these structures.
    BSM_TYPE_LIST_NOT_TESTED = {
        49: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        50: [u'BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM],
        52: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        59: [u'BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS],
        60: [u'BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS],
        61: [u'BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV],
        62: [u'BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32],
        82: [u'BSM_TOKEN_EXIT', BSM_TOKEN_EXIT],
        115: [u'BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64],
        117: [u'BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64],
        123: [u'BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX],
        124: [u'BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX],
        125: [u'BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX],
        126: [u'BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT],
        129: [u'BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128],
        130: [u'BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX]
    }

    def __init__(self):
        """Initializes a parser object."""
        super(BsmParser, self).__init__()
        # Create the dictionary with all token IDs: tested and untested.
        self.bsm_type_list_all = self.BSM_TYPE_LIST.copy()
        self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED)

    def _CopyByteArrayToBase16String(self, byte_array):
        """Copies a byte array into a base-16 encoded Unicode string.

    Args:
      byte_array: A byte array.

    Returns:
      A base-16 encoded Unicode string.
    """
        return u''.join([u'{0:02x}'.format(byte) for byte in byte_array])

    def _CopyUtf8ByteArrayToString(self, byte_array):
        """Copies a UTF-8 encoded byte array into a Unicode string.

    Args:
      byte_array: A byte array containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        byte_stream = b''.join(map(chr, byte_array))

        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(u'Unable to decode UTF-8 formatted byte array.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')

        string, _, _ = string.partition(b'\x00')
        return string

    def _IPv4Format(self, address):
        """Change an integer IPv4 address value for its 4 octets representation.

    Args:
      address: integer with the IPv4 address.

    Returns:
      IPv4 address in 4 octet representation (class A, B, C, D).
    """
        ipv4_string = self.IPV4_STRUCT.build(address)
        return socket.inet_ntoa(ipv4_string)

    def _IPv6Format(self, high, low):
        """Provide a readable IPv6 IP having the high and low part in 2 integers.

    Args:
      high: 64 bits integers number with the high part of the IPv6.
      low: 64 bits integers number with the low part of the IPv6.

    Returns:
      String with a well represented IPv6.
    """
        ipv6_string = self.IPV6_STRUCT.build(
            construct.Container(high=high, low=low))
        # socket.inet_ntop not supported in Windows.
        if hasattr(socket, u'inet_ntop'):
            return socket.inet_ntop(socket.AF_INET6, ipv6_string)

        # TODO: this approach returns double "::", illegal IPv6 addr.
        str_address = binascii.hexlify(ipv6_string)
        address = []
        blank = False
        for pos in range(0, len(str_address), 4):
            if str_address[pos:pos + 4] == u'0000':
                if not blank:
                    address.append(u'')
                    blank = True
            else:
                blank = False
                address.append(str_address[pos:pos + 4].lstrip(u'0'))
        return u':'.join(address)

    def _RawToUTF8(self, byte_stream):
        """Copies a UTF-8 byte stream into a Unicode string.

    Args:
      byte_stream: A byte stream containing an UTF-8 encoded string.

    Returns:
      A Unicode string.
    """
        try:
            string = byte_stream.decode(u'utf-8')
        except UnicodeDecodeError:
            logging.warning(
                u'Decode UTF8 failed, the message string may be cut short.')
            string = byte_stream.decode(u'utf-8', errors=u'ignore')
        return string.partition(b'\x00')[0]

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a BSM file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            is_bsm = self.VerifyFile(parser_mediator, file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse BSM file with error: {0:s}'.format(
                    exception))

        if not is_bsm:
            raise errors.UnableToParseFile(u'Not a BSM File, unable to parse.')

        event_object = self.ReadBSMEvent(parser_mediator, file_object)
        while event_object:
            parser_mediator.ProduceEvent(event_object)

            event_object = self.ReadBSMEvent(parser_mediator, file_object)

    def ReadBSMEvent(self, parser_mediator, file_object):
        """Returns a BsmEvent from a single BSM entry.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Returns:
      An event object.
    """
        # A list of tokens that has the entry.
        extra_tokens = []

        offset = file_object.tell()

        # Token header, first token for each entry.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        if bsm_type == u'BSM_HEADER32':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER64':
            token = structure.parse_stream(file_object)
        elif bsm_type == u'BSM_HEADER32_EX':
            token = structure.parse_stream(file_object)
        else:
            logging.warning(
                u'Token ID Header {0} not expected at position 0x{1:X}.'
                u'The parsing of the file cannot be continued'.format(
                    token_id, file_object.tell()))
            # TODO: if it is a Mac OS X, search for the trailer magic value
            #       as a end of the entry can be a possibility to continue.
            return

        length = token.bsm_header.length
        event_type = u'{0} ({1})'.format(
            bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type,
                                         u'UNKNOWN'),
            token.bsm_header.event_type)
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            token.timestamp, token.microsecond)

        # Read until we reach the end of the record.
        while file_object.tell() < (offset + length):
            # Check if it is a known token.
            try:
                token_id = self.BSM_TYPE.parse_stream(file_object)
            except (IOError, construct.FieldError):
                logging.warning(
                    u'Unable to parse the Token ID at position: {0:d}'.format(
                        file_object.tell()))
                return
            if not token_id in self.BSM_TYPE_LIST:
                pending = (offset + length) - file_object.tell()
                extra_tokens.extend(
                    self.TryWithUntestedStructures(file_object, token_id,
                                                   pending))
            else:
                token = self.BSM_TYPE_LIST[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))

        if file_object.tell() > (offset + length):
            logging.warning(u'Token ID {0} not expected at position 0x{1:X}.'
                            u'Jumping for the next entry.'.format(
                                token_id, file_object.tell()))
            try:
                file_object.seek((offset + length) - file_object.tell(),
                                 os.SEEK_CUR)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to jump to next entry with error: {0:s}'.format(
                        exception))
                return

        # BSM can be in more than one OS: BSD, Solaris and Mac OS X.
        if parser_mediator.platform == u'MacOSX':
            # In Mac OS X the last two tokens are the return status and the trailer.
            if len(extra_tokens) >= 2:
                return_value = extra_tokens[-2:-1][0]
                if (return_value.startswith(u'[BSM_TOKEN_RETURN32')
                        or return_value.startswith(u'[BSM_TOKEN_RETURN64')):
                    _ = extra_tokens.pop(len(extra_tokens) - 2)
                else:
                    return_value = u'Return unknown'
            else:
                return_value = u'Return unknown'
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return MacBsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                               return_value, trailer, offset)
        else:
            # Generic BSM format.
            if extra_tokens:
                trailer = extra_tokens[-1]
                if trailer.startswith(u'[BSM_TOKEN_TRAILER'):
                    _ = extra_tokens.pop(len(extra_tokens) - 1)
                else:
                    trailer = u'Trailer unknown'
            else:
                trailer = u'Trailer unknown'
            return BsmEvent(event_type, timestamp, u'. '.join(extra_tokens),
                            trailer, offset)

    def VerifyFile(self, parser_mediator, file_object):
        """Check if the file is a BSM file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_event: file that we want to check.

    Returns:
      True if this is a valid BSM file, otherwise False.
    """
        if file_object.tell() != 0:
            file_object.seek(0)

        # First part of the entry is always a Header.
        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False
        if token_id not in self.BSM_TYPE_LIST:
            return False

        bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, [u'', u''])
        try:
            if bsm_type == u'BSM_HEADER32':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER64':
                header = structure.parse_stream(file_object)
            elif bsm_type == u'BSM_HEADER32_EX':
                header = structure.parse_stream(file_object)
            else:
                return False
        except (IOError, construct.FieldError):
            return False
        if header.bsm_header.version != self.AUDIT_HEADER_VERSION:
            return False

        try:
            token_id = self.BSM_TYPE.parse_stream(file_object)
        except (IOError, construct.FieldError):
            return False

        # If is Mac OS X BSM file, next entry is a  text token indicating
        # if it is a normal start or it is a recovery track.
        if parser_mediator.platform == u'MacOSX':
            bsm_type_list = self.BSM_TYPE_LIST.get(token_id)
            if not bsm_type_list:
                return False

            if bsm_type_list[0] != u'BSM_TOKEN_TEXT':
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False
            try:
                token = self.BSM_TOKEN_TEXT.parse_stream(file_object)
            except (IOError, construct.FieldError):
                return

            text = self._CopyUtf8ByteArrayToString(token.text)
            if (text != u'launchctl::Audit startup'
                    and text != u'launchctl::Audit recovery'):
                logging.warning(
                    u'It is not a valid first entry for Mac OS X BSM.')
                return False

        file_object.seek(0)
        return True

    def TryWithUntestedStructures(self, file_object, token_id, pending):
        """Try to parse the pending part of the entry using untested structures.

    Args:
      file_object: BSM file.
      token_id: integer with the id that comes from the unknown token.
      pending: pending length of the entry.

    Returns:
      A list of extra tokens data that can be parsed using non-tested
      structures. A message indicating that a structure cannot be parsed
      is added for unparsed structures.
    """
        # Data from the unknown structure.
        start_position = file_object.tell()
        start_token_id = token_id
        extra_tokens = []

        # Read all the "pending" bytes.
        try:
            if token_id in self.bsm_type_list_all:
                token = self.bsm_type_list_all[token_id][1].parse_stream(
                    file_object)
                extra_tokens.append(
                    self.FormatToken(token_id, token, file_object))
                while file_object.tell() < (start_position + pending):
                    # Check if it is a known token.
                    try:
                        token_id = self.BSM_TYPE.parse_stream(file_object)
                    except (IOError, construct.FieldError):
                        logging.warning(
                            u'Unable to parse the Token ID at position: {0:d}'.
                            format(file_object.tell()))
                        return
                    if token_id not in self.bsm_type_list_all:
                        break
                    token = self.bsm_type_list_all[token_id][1].parse_stream(
                        file_object)
                    extra_tokens.append(
                        self.FormatToken(token_id, token, file_object))
        except (IOError, construct.FieldError):
            token_id = 255

        next_entry = (start_position + pending)
        if file_object.tell() != next_entry:
            # Unknown Structure.
            logging.warning(
                u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format(
                    start_position - 1, token_id, token_id))
            # TODO: another way to save this information must be found.
            extra_tokens.append(u'Plaso: some tokens from this entry can '
                                u'not be saved. Entry at 0x{0:X} with unknown '
                                u'token id "0x{1:X}".'.format(
                                    start_position - 1, start_token_id))
            # Move to next entry.
            file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR)
            # It returns null list because it doesn't know witch structure was
            # the incorrect structure that makes that it can arrive to the spected
            # end of the entry.
            return []
        return extra_tokens

    # TODO: instead of compare the text to know what structure was parsed
    #       is better to compare directly the numeric number (token_id),
    #       less readable, but better performance.
    def FormatToken(self, token_id, token, file_object):
        """Parse the Token depending of the type of the structure.

    Args:
      token_id: Identification integer of the token_type.
      token: Token struct to parse.
      file_object: BSM file.

    Returns:
      String with the parsed Token values.
    """
        if token_id not in self.bsm_type_list_all:
            return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id)

        bsm_type, _ = self.bsm_type_list_all.get(token_id, [u'', u''])

        if bsm_type in [
                u'BSM_TOKEN_TEXT', u'BSM_TOKEN_PATH', u'BSM_TOKEN_ZONENAME'
        ]:
            try:
                string = self._CopyUtf8ByteArrayToString(token.text)
            except TypeError:
                string = u'Unknown'
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type in [
                u'BSM_TOKEN_RETURN32', u'BSM_TOKEN_RETURN64', u'BSM_TOKEN_EXIT'
        ]:
            return u'[{0}: {1} ({2}), System call status: {3}]'.format(
                bsm_type, bsmtoken.BSM_ERRORS.get(token.status, u'Unknown'),
                token.status, token.return_value)

        elif bsm_type in [u'BSM_TOKEN_SUBJECT32', u'BSM_TOKEN_SUBJECT64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_SUBJECT32_EX', u'BSM_TOKEN_SUBJECT64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type in [u'BSM_TOKEN_ARGUMENT32', u'BSM_TOKEN_ARGUMENT64']:
            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format(
                bsm_type, string, token.num_arg, token.name_arg)

        elif bsm_type in [u'BSM_TOKEN_EXEC_ARGUMENTS', u'BSM_TOKEN_EXEC_ENV']:
            arguments = []
            for _ in range(0, token):
                sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(
                    file_object)
                string = self._CopyUtf8ByteArrayToString(sub_token.text)
                arguments.append(string)
            return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32':
            return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv4Format(token.ipv4)))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET128':
            return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type, token.port_number,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_ADDR':
            return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token))

        elif bsm_type == u'BSM_TOKEN_IP':
            return u'[IPv4_Header: 0x{0:s}]'.format(token.encode(u'hex'))

        elif bsm_type == u'BSM_TOKEN_ADDR_EXT':
            return u'[{0}: {1} ({2}). Address {3}]'.format(
                bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type,
                                                     u'UNKNOWN'),
                token.net_type,
                self._IPv6Format(token.ipv6.high, token.ipv6.low))

        elif bsm_type == u'BSM_TOKEN_PORT':
            return u'[{0}: {1}]'.format(bsm_type, token)

        elif bsm_type == u'BSM_TOKEN_TRAILER':
            return u'[{0}: {1}]'.format(bsm_type, token.record_length)

        elif bsm_type == u'BSM_TOKEN_FILE':
            # TODO: if this timestamp is usefull, it must be extracted as a separate
            #       event object.
            timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
                token.timestamp, token.microsecond)
            date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.UTC)
            date_time_string = date_time.strftime(u'%Y-%m-%d %H:%M:%S')

            string = self._CopyUtf8ByteArrayToString(token.text)
            return u'[{0}: {1:s}, timestamp: {2:s}]'.format(
                bsm_type, string, date_time_string)

        elif bsm_type == u'BSM_TOKEN_IPC':
            return u'[{0}: object type {1}, object id {2}]'.format(
                bsm_type, token.object_type, token.object_id)

        elif bsm_type in [u'BSM_TOKEN_PROCESS32', u'BSM_TOKEN_PROCESS64']:
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, self._IPv4Format(token.ipv4))

        elif bsm_type in [
                u'BSM_TOKEN_PROCESS32_EX', u'BSM_TOKEN_PROCESS64_EX'
        ]:
            if token.bsm_ip_type_short.net_type == self.AU_IPv6:
                ip = self._IPv6Format(token.bsm_ip_type_short.ip_addr.high,
                                      token.bsm_ip_type_short.ip_addr.low)
            elif token.bsm_ip_type_short.net_type == self.AU_IPv4:
                ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr)
            else:
                ip = u'unknown'
            return (
                u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), '
                u'pid({6}), session_id({7}), terminal_port({8}), '
                u'terminal_ip({9})]').format(
                    bsm_type, token.subject_data.audit_uid,
                    token.subject_data.effective_uid,
                    token.subject_data.effective_gid,
                    token.subject_data.real_uid, token.subject_data.real_gid,
                    token.subject_data.pid, token.subject_data.session_id,
                    token.terminal_port, ip)

        elif bsm_type == u'BSM_TOKEN_DATA':
            data = []
            data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, u'')
            if data_type == u'AUR_CHAR':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object))
            elif data_type == u'AUR_SHORT':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object))
            elif data_type == u'AUR_INT32':
                for _ in range(token.unit_count):
                    data.append(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))
            else:
                data.append(u'Unknown type data')
            # TODO: the data when it is string ends with ".", HW a space is return
            #       after uses the UTF-8 conversion.
            return u'[{0}: Format data: {1}, Data: {2}]'.format(
                bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print],
                self._RawToUTF8(u''.join(data)))

        elif bsm_type in [u'BSM_TOKEN_ATTR32', u'BSM_TOKEN_ATTR64']:
            return (u'[{0}: Mode: {1}, UID: {2}, GID: {3}, '
                    u'File system ID: {4}, Node ID: {5}, Device: {6}]').format(
                        bsm_type, token.file_mode, token.uid, token.gid,
                        token.file_system_id, token.file_system_node_id,
                        token.device)

        elif bsm_type == u'BSM_TOKEN_GROUPS':
            arguments = []
            for _ in range(token):
                arguments.append(
                    self._RawToUTF8(
                        self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)))
            return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments))

        elif bsm_type == u'BSM_TOKEN_AUT_SOCKINET32_EX':
            if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain,
                                          u'') == u'INET6':
                saddr = self._IPv6Format(token.structure_addr_port.saddr_high,
                                         token.structure_addr_port.saddr_low)
                daddr = self._IPv6Format(token.structure_addr_port.daddr_high,
                                         token.structure_addr_port.daddr_low)
            else:
                saddr = self._IPv4Format(
                    token.structure_addr_port.source_address)
                daddr = self._IPv4Format(
                    token.structure_addr_port.destination_address)

            return u'[{0}: from {1} port {2} to {3} port {4}]'.format(
                bsm_type, saddr, token.structure_addr_port.source_port, daddr,
                token.structure_addr_port.destination_port)

        elif bsm_type == u'BSM_TOKEN_IPC_PERM':
            return (u'[{0}: user id {1}, group id {2}, create user id {3}, '
                    u'create group id {4}, access {5}]').format(
                        bsm_type, token.user_id, token.group_id,
                        token.creator_user_id, token.creator_group_id,
                        token.access_mode)

        elif bsm_type == u'BSM_TOKEN_SOCKET_UNIX':
            string = self._CopyUtf8ByteArrayToString(token.path)
            return u'[{0}: Family {1}, Path {2:s}]'.format(
                bsm_type, token.family, string)

        elif bsm_type == u'BSM_TOKEN_OPAQUE':
            string = self._CopyByteArrayToBase16String(token.text)
            return u'[{0}: {1:s}]'.format(bsm_type, string)

        elif bsm_type == u'BSM_TOKEN_SEQUENCE':
            return u'[{0}: {1}]'.format(bsm_type, token)
Beispiel #16
0
import construct as c
from gevent import socket

class Error(Exception):
    pass

MAX_PACKET_SIZE = 2**16

log = logging.getLogger('tracker')

INIT_ID = 0x41727101980

connect_req = c.Struct('request', 
    c.UBInt64('connection_id'),
    c.UBInt32('action'),
    c.UBInt32('transaction_id')
)

connect_resp = c.Struct('response',
    c.UBInt32('action'),
    c.UBInt32('transaction_id'),
    c.UBInt64('connection_id'),
)

announce_req = c.Struct('request',
    c.UBInt64('connection_id'),
    c.UBInt32('action'),
    c.UBInt32('transaction_id'),
    c.Bytes('info_hash', 20),
    c.Bytes('peer_id', 20),
Beispiel #17
0
import construct
import datetime
import os
import sys

KEYCHAIN_MAGIC_HEADER = 'kych'
KEYCHAIN_MAJOR_VERSION = 1
KEYCHAIN_MINOR_VERSION = 0

# DB HEADER
KEYCHAIN_DB_HEADER = construct.Struct('db_header',
                                      construct.String('magic', 4),
                                      construct.UBInt16('major_version'),
                                      construct.UBInt16('minor_version'),
                                      construct.UBInt32('header_size'),
                                      construct.UBInt32('schema_offset'),
                                      construct.Padding(4))

# DB SCHEMA
KEYCHAIN_DB_SCHEMA = construct.Struct('db_schema', construct.UBInt32('size'),
                                      construct.UBInt32('number_of_tables'))
# For each umber_of_tables, the schema has a TABLE_OFFSET with the
# offset starting in the DB_SCHEMA.
TABLE_OFFSET = construct.UBInt32('table_offset')

# TABLE
TABLE_RECORD_TYPE = {
    0: u'Schema information',
    1: u'Schema indexes',
    2: u'Schema attributes',
Beispiel #18
0
class FirefoxCacheParser(BaseFirefoxCacheParser):
    """Parses Firefox cache version 1 files (Firefox 31 or earlier)."""

    NAME = u'firefox_cache'
    DESCRIPTION = (
        u'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).')

    _CACHE_VERSION = 1

    # Initial size of Firefox 4 and later cache files.
    _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024

    # Smallest possible block size in Firefox cache files.
    _MINUMUM_BLOCK_SIZE = 256

    _CACHE_RECORD_HEADER_STRUCT = construct.Struct(
        u'record_header', construct.UBInt16(u'major'),
        construct.UBInt16(u'minor'), construct.UBInt32(u'location'),
        construct.UBInt32(u'fetch_count'), construct.UBInt32(u'last_fetched'),
        construct.UBInt32(u'last_modified'), construct.UBInt32(u'expire_time'),
        construct.UBInt32(u'data_size'), construct.UBInt32(u'request_size'),
        construct.UBInt32(u'info_size'))

    _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof()

    # TODO: change into regexp.
    _CACHE_FILENAME = (pyparsing.Word(pyparsing.hexnums, exact=5) +
                       pyparsing.Word(u'm', exact=1) +
                       pyparsing.Word(pyparsing.nums, exact=2))

    FIREFOX_CACHE_CONFIG = collections.namedtuple(
        u'firefox_cache_config', u'block_size first_record_offset')

    def _GetFirefoxConfig(self, file_object, display_name):
        """Determine cache file block size.

    Args:
      file_object: A file-like object.
      display_name: the display name.

    Raises:
      UnableToParseFile: if no valid cache record could be found.
    """
        # There ought to be a valid record within the first 4 MiB. We use this
        # limit to prevent reading large invalid files.
        to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE)

        while file_object.get_offset() < to_read:
            offset = file_object.get_offset()

            try:
                # We have not yet determined the block size, so we use the smallest
                # possible size.
                fetched, _, _ = self._NextRecord(file_object, display_name,
                                                 self._MINUMUM_BLOCK_SIZE)

                record_size = (self._CACHE_RECORD_HEADER_SIZE +
                               fetched.request_size + fetched.info_size)

                if record_size >= 4096:
                    # _CACHE_003_
                    block_size = 4096
                elif record_size >= 1024:
                    # _CACHE_002_
                    block_size = 1024
                else:
                    # _CACHE_001_
                    block_size = 256

                return self.FIREFOX_CACHE_CONFIG(block_size, offset)

            except IOError:
                logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
                    self.NAME, display_name, offset))

        raise errors.UnableToParseFile(
            u'Could not find a valid cache record. Not a Firefox cache file.')

    def _NextRecord(self, file_object, display_name, block_size):
        """Provide the next cache record.

    Args:
      file_object: A file-like object.
      display_name: the display name.
      block_size: the block size.

    Returns:
      A tuple containing the fetched, modified and expire event objects
      (instances of EventObject) or None.
    """
        offset = file_object.get_offset()

        try:
            cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream(
                file_object)
        except (IOError, construct.FieldError):
            raise IOError(u'Unable to parse stream.')

        if not self._ValidateCacheRecordHeader(cache_record_header):
            # Move reader to next candidate block.
            file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE
            file_object.seek(file_offset, os.SEEK_CUR)
            raise IOError(u'Not a valid Firefox cache record.')

        # The last byte in a request is null.
        url = file_object.read(cache_record_header.request_size)[:-1]

        # HTTP response header, even elements are keys, odd elements values.
        header_data = file_object.read(cache_record_header.info_size)

        request_method, response_code = self._ParseHTTPHeaders(
            header_data, offset, display_name)

        # A request can span multiple blocks, so we use modulo.
        file_offset = file_object.get_offset() - offset
        _, remainder = divmod(file_offset, block_size)

        # Move reader to next candidate block. Include the null-byte skipped above.
        file_object.seek(block_size - remainder, os.SEEK_CUR)

        cache_record_values = {
            u'data_size': cache_record_header.data_size,
            u'fetch_count': cache_record_header.fetch_count,
            u'info_size': cache_record_header.info_size,
            u'location': cache_record_header.location,
            u'major': cache_record_header.major,
            u'minor': cache_record_header.minor,
            u'request_method': request_method,
            u'request_size': cache_record_header.request_size,
            u'response_code': response_code,
            u'version': self._CACHE_VERSION,
            u'url': url
        }

        fetched = FirefoxCacheEvent(cache_record_header.last_fetched,
                                    eventdata.EventTimestamp.LAST_VISITED_TIME,
                                    cache_record_values)

        if cache_record_header.last_modified:
            modified = FirefoxCacheEvent(cache_record_header.last_modified,
                                         eventdata.EventTimestamp.WRITTEN_TIME,
                                         cache_record_values)
        else:
            modified = None

        if cache_record_header.expire_time:
            expire = FirefoxCacheEvent(
                cache_record_header.expire_time,
                eventdata.EventTimestamp.EXPIRATION_TIME, cache_record_values)
        else:
            expire = None

        return fetched, modified, expire

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Firefox cache file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        filename = parser_mediator.GetFilename()
        display_name = parser_mediator.GetDisplayName()

        try:
            # Match cache filename. Five hex characters + 'm' + two digit
            # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
            # instead contain data only.
            self._CACHE_FILENAME.parseString(filename)
        except pyparsing.ParseException:
            if not filename.startswith(u'_CACHE_00'):
                raise errors.UnableToParseFile(u'Not a Firefox cache1 file.')

        firefox_config = self._GetFirefoxConfig(file_object, display_name)

        file_object.seek(firefox_config.first_record_offset)

        while file_object.get_offset() < file_object.get_size():
            try:
                fetched, modified, expire = self._NextRecord(
                    file_object, display_name, firefox_config.block_size)
                parser_mediator.ProduceEvent(fetched)

                if modified:
                    parser_mediator.ProduceEvent(modified)

                if expire:
                    parser_mediator.ProduceEvent(expire)
            except IOError:
                file_offset = file_object.get_offset(
                ) - self._MINUMUM_BLOCK_SIZE
                logging.debug(
                    (u'[{0:s}] Invalid cache record in file: {1:s} at offset: '
                     u'{2:d}.').format(self.NAME, display_name, file_offset))
Beispiel #19
0
class AslParser(interface.BaseParser):
    """Parser for ASL log files."""

    NAME = 'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00'

    # If not right assigned, the value is "-1".
    ASL_NO_RIGHTS = 'ffffffff'

    # Priority level (criticity)
    ASL_MESSAGE_PRIORITY = {
        0: 'EMERGENCY',
        1: 'ALERT',
        2: 'CRITICAL',
        3: 'ERROR',
        4: 'WARNING',
        5: 'NOTICE',
        6: 'INFO',
        7: 'DEBUG'
    }

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: epoch time when the first entry was written.
    # last_offset: last record in the file.
    ASL_HEADER_STRUCT = construct.Struct('asl_header_struct',
                                         construct.String('magic', 12),
                                         construct.UBInt32('version'),
                                         construct.UBInt64('offset'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('cache_size'),
                                         construct.UBInt64('last_offset'),
                                         construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: Epoch integer that has the time when the entry was created.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    ASL_RECORD_STRUCT = construct.Struct('asl_record_struct',
                                         construct.Padding(2),
                                         construct.UBInt32('tam_entry'),
                                         construct.UBInt64('next_offset'),
                                         construct.UBInt64('asl_message_id'),
                                         construct.UBInt64('timestamp'),
                                         construct.UBInt32('nanosec'),
                                         construct.UBInt16('level'),
                                         construct.UBInt16('flags'),
                                         construct.UBInt32('pid'),
                                         construct.UBInt32('uid'),
                                         construct.UBInt32('gid'),
                                         construct.UBInt32('read_uid'),
                                         construct.UBInt32('read_gid'),
                                         construct.UBInt64('ref_pid'))

    ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    ASL_OCTET_STRING = construct.ExprAdapter(construct.Octet('string'),
                                             encoder=lambda obj, ctx: ord(obj),
                                             decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    ASL_STRING = construct.BitStruct(
        'string', construct.Flag('type'), construct.Bits('filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble('string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    ASL_POINTER = construct.UBInt64('pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes lenght of String][String].
    ASL_RECORD_DYN_VALUE = construct.Struct(
        'asl_record_dyn_value', construct.Padding(2),
        construct.PascalString('value',
                               length_field=construct.UBInt32('length')))

    def Parse(self, parser_context, file_entry):
        """Extract entries from an ASL file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self.ASL_MAGIC:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        # Get the first and the last entry.
        offset = header.offset
        old_offset = header.offset
        last_offset_header = header.last_offset

        # If the ASL file has entries.
        if offset:
            event_object, offset = self.ReadAslEvent(file_object, offset)
            while event_object:
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

                # TODO: an anomaly object must be emitted once that is implemented.
                # Sanity check, the last read element must be the same as
                # indicated by the header.
                if offset == 0 and old_offset != last_offset_header:
                    logging.warning(u'Parsing ended before the header ends.')
                old_offset = offset
                event_object, offset = self.ReadAslEvent(file_object, offset)

        file_object.close()

    def ReadAslEvent(self, file_object, offset):
        """Returns an AslEvent from a single ASL entry.

    Args:
      file_object: a file-like object that points to an ASL file.
      offset: offset where the static part of the entry starts.

    Returns:
      An event object constructed from a single ASL record.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_start = file_object.tell()
        dynamic_part = file_object.read(offset - file_object.tell())

        if not offset:
            return None, None

        try:
            record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            logging.warning(
                u'Unable to parse ASL event with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2x[8bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the exta field.
        # All of this 8-byte field can be saved using one of these three differents
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                raw_field = file_object.read(8)
            except (IOError, construct.FieldError) as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:d}'.format(
                        exception))
                return None, None
            try:
                # Try to read as a String.
                field = self.ASL_STRING.parse(raw_field)
                values.append(''.join(field.string[0:field.string_length]))
                # Go to parse the next extra field.
                tam_fields -= 8
                continue
            except ValueError:
                pass
            # If it is not a string, it must be a pointer.
            try:
                field = self.ASL_POINTER.parse(raw_field)
            except ValueError as exception:
                logging.warning(
                    u'Unable to parse ASL event with error: {0:s}'.format(
                        exception))
                return None, None
            if field != 0:
                # The next IF ELSE is only for performance issues, avoiding seek.
                # If the pointer points a lower position than where the actual entry
                # starts, it means that it points to a previuos entry.
                pos = field - dynamic_start
                # Bigger or equal 0 means that the data is in the actual entry.
                if pos >= 0:
                    try:
                        values.append((self.ASL_RECORD_DYN_VALUE.parse(
                            dynamic_part[pos:])).value.partition('\x00')[0])
                    except (IOError, construct.FieldError) as exception:
                        logging.warning(
                            u'Unable to parse ASL event with error: {0:s}'.
                            format(exception))
                        return None, None
                else:
                    # Only if it is a pointer that points to the
                    # heap from another entry we use the seek method.
                    main_position = file_object.tell()
                    # If the pointer is in a previous entry.
                    if main_position > field:
                        file_object.seek(field - main_position, os.SEEK_CUR)
                        try:
                            values.append(
                                (self.ASL_RECORD_DYN_VALUE.parse_stream(
                                    file_object)).value.partition('\x00')[0])
                        except (IOError, construct.FieldError):
                            logging.warning((
                                u'The pointer at {0:d} (0x{0:x}) points to invalid '
                                u'information.'
                            ).format(main_position -
                                     self.ASL_POINTER.sizeof()))
                        # Come back to the position in the entry.
                        _ = file_object.read(main_position -
                                             file_object.tell())
                    else:
                        _ = file_object.read(field - main_position)
                        values.append((self.ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)).value.partition('\x00')[0])
                        # Come back to the position in the entry.
                        file_object.seek(main_position - file_object.tell(),
                                         os.SEEK_CUR)
            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        microsecond = record_header.nanosec // 1000
        timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond(
            record_header.timestamp, microsecond)
        record_position = offset
        message_id = record_header.asl_message_id
        level = u'{0} ({1})'.format(
            self.ASL_MESSAGE_PRIORITY[record_header.level],
            record_header.level)
        # If the value is -1 (FFFFFFFF), it can be read by everyone.
        if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16):
            read_uid = record_header.read_uid
        else:
            read_uid = 'ALL'
        if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16):
            read_gid = record_header.read_gid
        else:
            read_gid = 'ALL'

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        computer_name = values[0]
        sender = values[1]
        facility = values[2]
        message = values[3]

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = ''
        if len(values) > 4:
            values = values[4:]
            for index in xrange(0, len(values) // 2):
                extra_information += (u'[{0}: {1}]'.format(
                    values[index * 2], values[(index * 2) + 1]))

        # Return the event and the offset for the next entry.
        return AslEvent(timestamp, record_position, message_id, level,
                        record_header, read_uid, read_gid, computer_name,
                        sender, facility, message,
                        extra_information), record_header.next_offset
Beispiel #20
0
class ASLParser(interface.FileObjectParser):
    """Parser for ASL log files."""

    _INITIAL_FILE_OFFSET = None

    NAME = u'asl_log'
    DESCRIPTION = u'Parser for ASL log files.'

    _ASL_MAGIC = b'ASL DB\x00\x00\x00\x00\x00\x00'

    # ASL File header.
    # magic: magic number that identify ASL files.
    # version: version of the file.
    # offset: first record in the file.
    # timestamp: time when the first entry was written.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # last_offset: last record in the file.
    _ASL_HEADER_STRUCT = construct.Struct(u'asl_header_struct',
                                          construct.String(u'magic', 12),
                                          construct.UBInt32(u'version'),
                                          construct.UBInt64(u'offset'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'cache_size'),
                                          construct.UBInt64(u'last_offset'),
                                          construct.Padding(36))

    # The record structure is:
    # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry]
    # Record static structure.
    # tam_entry: it contains the number of bytes from this file position
    #            until the end of the record, without counts itself.
    # next_offset: next record. If is equal to 0x00, it is the last record.
    # asl_message_id: integer that has the numeric identification of the event.
    # timestamp: the entry creation date and time.
    #     Contains the number of seconds since January 1, 1970 00:00:00 UTC.
    # nanosecond: nanosecond to add to the timestamp.
    # level: level of priority.
    # pid: process identification that ask to save the record.
    # uid: user identification that has lunched the process.
    # gid: group identification that has lunched the process.
    # read_uid: identification id of a user. Only applied if is not -1 (all FF).
    #           Only root and this user can read the entry.
    # read_gid: the same than read_uid, but for the group.
    _ASL_RECORD_STRUCT = construct.Struct(u'asl_record_struct',
                                          construct.Padding(2),
                                          construct.UBInt32(u'tam_entry'),
                                          construct.UBInt64(u'next_offset'),
                                          construct.UBInt64(u'asl_message_id'),
                                          construct.UBInt64(u'timestamp'),
                                          construct.UBInt32(u'nanosec'),
                                          construct.UBInt16(u'level'),
                                          construct.UBInt16(u'flags'),
                                          construct.UBInt32(u'pid'),
                                          construct.UBInt32(u'uid'),
                                          construct.UBInt32(u'gid'),
                                          construct.UBInt32(u'read_uid'),
                                          construct.UBInt32(u'read_gid'),
                                          construct.UBInt64(u'ref_pid'))

    _ASL_RECORD_STRUCT_SIZE = _ASL_RECORD_STRUCT.sizeof()

    # 8-byte fields, they can be:
    # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String].
    # - Integer: integer that has the byte position in the file that points
    #            to an ASL_RECORD_DYN_VALUE struct. If the value of the integer
    #            is equal to 0, it means that it has not data (skip).

    # If the field is a String, we use this structure to decode each
    # integer byte in the corresponding character (ASCII Char).
    _ASL_OCTET_STRING = construct.ExprAdapter(
        construct.Octet(u'string'),
        encoder=lambda obj, ctx: ord(obj),
        decoder=lambda obj, ctx: chr(obj))

    # Field string structure. If the first bit is 1, it means that it
    # is a String (1000) = 8, then the next nibble has the number of
    # characters. The last 7 bytes are the number of bytes.
    _ASL_STRING = construct.BitStruct(
        u'string', construct.Flag(u'type'), construct.Bits(u'filler', 3),
        construct.If(lambda ctx: ctx.type, construct.Nibble(u'string_length')),
        construct.If(lambda ctx: ctx.type,
                     construct.Array(7, _ASL_OCTET_STRING)))

    # 8-byte pointer to a byte position in the file.
    _ASL_POINTER = construct.UBInt64(u'pointer')

    # Dynamic data structure pointed by a pointer that contains a String:
    # [2 bytes padding][4 bytes size of String][String].
    _ASL_RECORD_DYN_VALUE = construct.Struct(
        u'asl_record_dyn_value', construct.Padding(2),
        construct.UBInt32(u'size'),
        construct.Bytes(u'value', lambda ctx: ctx.size))

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses an ALS file-like object.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self._ASL_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse ASL Header with error: {0:s}.'.format(
                    exception))

        if header.magic != self._ASL_MAGIC:
            raise errors.UnableToParseFile(
                u'Not an ASL Header, unable to parse.')

        offset = header.offset
        if not offset:
            return

        header_last_offset = header.last_offset

        previous_offset = offset
        event_object, offset = self.ReadASLEvent(parser_mediator, file_object,
                                                 offset)
        while event_object:
            # Sanity check, the last read element must be the same as
            # indicated by the header.
            if offset == 0 and previous_offset != header_last_offset:
                parser_mediator.ProduceParseError(
                    u'Unable to parse header. Last element header does not match '
                    u'header offset.')
            previous_offset = offset
            event_object, offset = self.ReadASLEvent(parser_mediator,
                                                     file_object, offset)

    def ReadASLEvent(self, parser_mediator, file_object, offset):
        """Reads an ASL record at a specific offset.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to an ASL file.
      offset: an integer containing the offset of the ASL record.

    Returns:
      A tuple of an event object extracted from the ASL record,
      and the offset to the next ASL record in the file.
    """
        # The heap of the entry is saved to try to avoid seek (performance issue).
        # It has the real start position of the entry.
        dynamic_data_offset = file_object.tell()

        try:
            dynamic_data = file_object.read(offset - dynamic_data_offset)
        except IOError as exception:
            parser_mediator.ProduceParseError(
                u'unable to read ASL record dynamic data with error: {0:s}'.
                format(exception))
            return None, None

        if not offset:
            return None, None

        try:
            record_struct = self._ASL_RECORD_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            parser_mediator.ProduceParseError(
                u'unable to parse ASL record with error: {0:s}'.format(
                    exception))
            return None, None

        # Variable tam_fields = is the real length of the dynamic fields.
        # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before]
        # In Record_Struct we have a field called tam_entry, where it has the number
        # of bytes until the end of the entry from the position that the field is.
        # The tam_entry is between the 2th and the 6th byte in the [Record_Struct].
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # Also, we do not need [Point_Entry_Before] and then we delete the size of
        # [Point_Entry_Before] that it is 8 bytes (8):
        # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before]
        # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8
        # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2
        tam_fields = record_struct.tam_entry - self._ASL_RECORD_STRUCT_SIZE - 2

        # Dynamic part of the entry that contains minimal four fields of 8 bytes
        # plus 2 x [8 bytes] fields for each extra ASL_Field.
        # The four first fields are always the Host, Sender, Facility and Message.
        # After the four first fields, the entry might have extra ASL_Fields.
        # For each extra ASL_field, it has a pair of 8-byte fields where the first
        # 8 bytes contains the name of the extra ASL_field and the second 8 bytes
        # contains the text of the extra field.
        # All of this 8-byte field can be saved using one of these three different
        # types:
        # - Null value ('0000000000000000'): nothing to do.
        # - String: It is string if first bit = 1 or first nibble = 8 (1000).
        #           Second nibble has the length of string.
        #           The next 7 bytes have the text characters of the string
        #           padding the end with null characters: '0x00'.
        #           Example: [8468 6964 6400 0000]
        #                    [8] String, [4] length, value: [68 69 64 64] = hidd.
        # - Pointer: static position in the file to a special struct
        #            implemented as an ASL_RECORD_DYN_VALUE.
        #            Example: [0000 0000 0000 0077]
        #            It points to the file position 0x077 that has a
        #            ASL_RECORD_DYN_VALUE structure.
        values = []
        while tam_fields > 0:
            try:
                field_data = file_object.read(8)
            except IOError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to read ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            # Try to read the field data as a string.
            try:
                asl_string_struct = self._ASL_STRING.parse(field_data)
                string_data = b''.join(
                    asl_string_struct.string[0:asl_string_struct.
                                             string_length])
                values.append(string_data)

                # Go to parse the next extra field.
                tam_fields -= 8
                continue

            except ValueError:
                pass

            # If the field is not a string it must be a pointer.
            try:
                pointer_value = self._ASL_POINTER.parse(field_data)
            except ValueError as exception:
                parser_mediator.ProduceParseError(
                    u'unable to parse ASL field with error: {0:s}'.format(
                        exception))
                return None, None

            if not pointer_value:
                # Next extra field: 8 bytes more.
                tam_fields -= 8
                continue

            # The next IF ELSE is only for performance issues, avoiding seek.
            # If the pointer points a lower position than where the actual entry
            # starts, it means that it points to a previous entry.
            pos = pointer_value - dynamic_data_offset

            # Greater or equal 0 means that the data is in the actual entry.
            if pos >= 0:
                try:
                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse(
                        dynamic_data[pos:])
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                except (IOError, construct.FieldError) as exception:
                    parser_mediator.ProduceParseError((
                        u'unable to parse ASL record dynamic value with error: '
                        u'{0:s}').format(exception))
                    return None, None

            else:
                # Only if it is a pointer that points to the
                # heap from another entry we use the seek method.
                main_position = file_object.tell()

                # If the pointer is in a previous entry.
                if main_position > pointer_value:
                    file_object.seek(pointer_value - main_position,
                                     os.SEEK_CUR)

                    try:
                        dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                            file_object)
                        dyn_value = dyn_value_struct.value.partition(
                            b'\x00')[0]
                        values.append(dyn_value)

                    except (IOError, construct.FieldError):
                        parser_mediator.ProduceParseError((
                            u'the pointer at {0:d} (0x{0:08x}) points to invalid '
                            u'information.'
                        ).format(main_position - self._ASL_POINTER.sizeof()))

                    # Come back to the position in the entry.
                    _ = file_object.read(main_position - file_object.tell())

                else:
                    _ = file_object.read(pointer_value - main_position)

                    dyn_value_struct = self._ASL_RECORD_DYN_VALUE.parse_stream(
                        file_object)
                    dyn_value = dyn_value_struct.value.partition(b'\x00')[0]
                    values.append(dyn_value)

                    # Come back to the position in the entry.
                    file_object.seek(main_position - file_object.tell(),
                                     os.SEEK_CUR)

            # Next extra field: 8 bytes more.
            tam_fields -= 8

        # Read the last 8 bytes of the record that points to the previous entry.
        _ = file_object.read(8)

        # Parsed section, we translate the read data to an appropriate format.
        micro_seconds, _ = divmod(record_struct.nanosec, 1000)

        # Parsing the dynamic values (text or pointers to position with text).
        # The first four are always the host, sender, facility, and message.
        number_of_values = len(values)
        if number_of_values < 4:
            parser_mediator.ProduceParseError(
                u'less than four values read from an ASL event.')

        computer_name = u'N/A'
        sender = u'N/A'
        facility = u'N/A'
        message = u'N/A'

        if number_of_values >= 1:
            computer_name = values[0].decode(u'utf-8')

        if number_of_values >= 2:
            sender = values[1].decode(u'utf-8')

        if number_of_values >= 3:
            facility = values[2].decode(u'utf-8')

        if number_of_values >= 4:
            message = values[3].decode(u'utf-8')

        # If the entry has an extra fields, they works as a pairs:
        # The first is the name of the field and the second the value.
        extra_information = u''
        if number_of_values > 4 and number_of_values % 2 == 0:
            # Taking all the extra attributes and merging them together,
            # eg: a = [1, 2, 3, 4] will look like "1: 2, 3: 4".
            try:
                extra_values = map(py2to3.UNICODE_TYPE, values[4:])
                extra_information = u', '.join(
                    map(u': '.join, zip(extra_values[0::2],
                                        extra_values[1::2])))
            except UnicodeDecodeError as exception:
                parser_mediator.ProduceParseError(
                    u'Unable to decode all ASL values in the extra information fields.'
                )

        event_object = ASLEvent(record_struct.timestamp,
                                offset,
                                record_struct.asl_message_id,
                                record_struct.level,
                                record_struct.pid,
                                record_struct.uid,
                                record_struct.gid,
                                record_struct.read_uid,
                                record_struct.read_gid,
                                computer_name,
                                sender,
                                facility,
                                message,
                                extra_information,
                                micro_seconds=micro_seconds)
        parser_mediator.ProduceEvent(event_object)

        return (event_object, record_struct.next_offset)
Beispiel #21
0
class FirefoxCacheParser(BaseFirefoxCacheParser):
    """Parses Firefox cache version 1 files (Firefox 31 or earlier)."""

    NAME = 'firefox_cache'
    DESCRIPTION = (
        'Parser for Firefox Cache version 1 files (Firefox 31 or earlier).')

    _CACHE_VERSION = 1

    # Initial size of Firefox 4 and later cache files.
    _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024

    # Smallest possible block size in Firefox cache files.
    _MINUMUM_BLOCK_SIZE = 256

    _CACHE_RECORD_HEADER_STRUCT = construct.Struct(
        'record_header', construct.UBInt16('major'),
        construct.UBInt16('minor'), construct.UBInt32('location'),
        construct.UBInt32('fetch_count'), construct.UBInt32('last_fetched'),
        construct.UBInt32('last_modified'), construct.UBInt32('expire_time'),
        construct.UBInt32('data_size'), construct.UBInt32('request_size'),
        construct.UBInt32('info_size'))

    _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof()

    # TODO: change into regexp.
    _CACHE_FILENAME = (pyparsing.Word(pyparsing.hexnums, exact=5) +
                       pyparsing.Word('m', exact=1) +
                       pyparsing.Word(pyparsing.nums, exact=2))

    FIREFOX_CACHE_CONFIG = collections.namedtuple(
        'firefox_cache_config', 'block_size first_record_offset')

    def _GetFirefoxConfig(self, file_object, display_name):
        """Determine cache file block size.

    Args:
      file_object (dfvfs.FileIO): a file-like object.
      display_name (str): display name.

    Raises:
      UnableToParseFile: if no valid cache record could be found.
    """
        # There ought to be a valid record within the first 4 MiB. We use this
        # limit to prevent reading large invalid files.
        to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE)

        while file_object.get_offset() < to_read:
            offset = file_object.get_offset()

            try:
                # We have not yet determined the block size, so we use the smallest
                # possible size.
                cache_record_header, _ = self._ReadCacheEntry(
                    file_object, display_name, self._MINUMUM_BLOCK_SIZE)

                record_size = (self._CACHE_RECORD_HEADER_SIZE +
                               cache_record_header.request_size +
                               cache_record_header.info_size)

                if record_size >= 4096:
                    # _CACHE_003_
                    block_size = 4096
                elif record_size >= 1024:
                    # _CACHE_002_
                    block_size = 1024
                else:
                    # _CACHE_001_
                    block_size = 256

                return self.FIREFOX_CACHE_CONFIG(block_size, offset)

            except IOError:
                logging.debug('[{0:s}] {1:s}:{2:d}: Invalid record.'.format(
                    self.NAME, display_name, offset))

        raise errors.UnableToParseFile(
            'Could not find a valid cache record. Not a Firefox cache file.')

    def _ParseCacheEntry(self, parser_mediator, file_object, display_name,
                         block_size):
        """Parses a cache entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.
      display_name (str): display name.
      block_size (int): block size.
    """
        cache_record_header, event_data = self._ReadCacheEntry(
            file_object, display_name, block_size)

        date_time = dfdatetime_posix_time.PosixTime(
            timestamp=cache_record_header.last_fetched)
        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_LAST_VISITED)
        parser_mediator.ProduceEventWithEventData(event, event_data)

        if cache_record_header.last_modified:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=cache_record_header.last_modified)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_WRITTEN)
            parser_mediator.ProduceEventWithEventData(event, event_data)

        if cache_record_header.expire_time:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=cache_record_header.expire_time)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_EXPIRATION)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def _ReadCacheEntry(self, file_object, display_name, block_size):
        """Reads a cache entry.

    Args:
      file_object (dfvfs.FileIO): a file-like object.
      display_name (str): display name.
      block_size (int): block size.

    Returns:
      tuple: contains:
        construct.Stuct: cache record header structure.
        FirefoxCacheEventData: event data.
    """
        offset = file_object.get_offset()

        try:
            cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream(
                file_object)
        except (IOError, construct.FieldError):
            raise IOError('Unable to parse stream.')

        if not self._ValidateCacheRecordHeader(cache_record_header):
            # Move reader to next candidate block.
            file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE
            file_object.seek(file_offset, os.SEEK_CUR)
            raise IOError('Not a valid Firefox cache record.')

        # The URL string is NUL-terminated.
        url = file_object.read(cache_record_header.request_size)[:-1]

        # HTTP response header, even elements are keys, odd elements values.
        header_data = file_object.read(cache_record_header.info_size)

        request_method, response_code = self._ParseHTTPHeaders(
            header_data, offset, display_name)

        # A request can span multiple blocks, so we use modulo.
        file_offset = file_object.get_offset() - offset
        _, remainder = divmod(file_offset, block_size)

        # Move reader to next candidate block. Include the null-byte skipped above.
        file_object.seek(block_size - remainder, os.SEEK_CUR)

        event_data = FirefoxCacheEventData()
        event_data.data_size = cache_record_header.data_size
        event_data.fetch_count = cache_record_header.fetch_count
        event_data.info_size = cache_record_header.info_size
        event_data.location = cache_record_header.location
        event_data.major = cache_record_header.major
        event_data.minor = cache_record_header.minor
        event_data.request_method = request_method
        event_data.request_size = cache_record_header.request_size
        event_data.response_code = response_code
        event_data.url = url
        event_data.version = self._CACHE_VERSION

        return cache_record_header, event_data

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Firefox cache file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        filename = parser_mediator.GetFilename()
        display_name = parser_mediator.GetDisplayName()

        try:
            # Match cache filename. Five hex characters + 'm' + two digit
            # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd'
            # instead contain data only.
            self._CACHE_FILENAME.parseString(filename)
        except pyparsing.ParseException:
            if not filename.startswith('_CACHE_00'):
                raise errors.UnableToParseFile('Not a Firefox cache1 file.')

        firefox_config = self._GetFirefoxConfig(file_object, display_name)

        file_object.seek(firefox_config.first_record_offset)

        while file_object.get_offset() < file_object.get_size():
            try:
                self._ParseCacheEntry(parser_mediator, file_object,
                                      display_name, firefox_config.block_size)

            except IOError:
                file_offset = file_object.get_offset(
                ) - self._MINUMUM_BLOCK_SIZE
                logging.debug(
                    ('[{0:s}] Invalid cache record in file: {1:s} at offset: '
                     '{2:d}.').format(self.NAME, display_name, file_offset))
Beispiel #22
0
class CupsIppParser(interface.FileObjectParser):
    """Parser for CUPS IPP files. """

    NAME = u'cups_ipp'
    DESCRIPTION = u'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct(u'cups_ipp_header_struct',
                                       construct.UBInt8(u'major_version'),
                                       construct.UBInt8(u'minor_version'),
                                       construct.UBInt16(u'operation_id'),
                                       construct.UBInt32(u'request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID, per cups source file ipp-support.c.
    TYPE_GENERAL_INTEGER = 0x20
    TYPE_INTEGER = 0x21
    TYPE_BOOL = 0x22
    TYPE_ENUMERATION = 0x23
    TYPE_DATETIME = 0x31

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8(u'integer')
    INTEGER_32 = construct.UBInt32(u'integer')
    TEXT = construct.PascalString(u'text',
                                  encoding='utf-8',
                                  length_field=construct.UBInt8(u'length'))
    BOOLEAN = construct.Struct(u'boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct(u'integer_value', construct.Padding(1),
                               INTEGER_32)

    # This is an RFC 2579 datetime.
    DATETIME = construct.Struct(
        u'datetime',
        construct.Padding(1),
        construct.UBInt16(u'year'),
        construct.UBInt8(u'month'),
        construct.UBInt8(u'day'),
        construct.UBInt8(u'hour'),
        construct.UBInt8(u'minutes'),
        construct.UBInt8(u'seconds'),
        construct.UBInt8(u'deciseconds'),
        construct.String(u'direction_from_utc', length=1, encoding='ascii'),
        construct.UBInt8(u'hours_from_utc'),
        construct.UBInt8(u'minutes_from_utc'),
    )

    # Name of the pair.
    PAIR_NAME = construct.Struct(u'pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    NAME_PAIR_TRANSLATION = {
        u'printer-uri': u'uri',
        u'job-uuid': u'job_id',
        u'DestinationPrinterID': u'printer_id',
        u'job-originating-user-name': u'user',
        u'job-name': u'job_name',
        u'document-format': u'doc_type',
        u'job-originating-host-name': u'computer_name',
        u'com.apple.print.JobInfo.PMApplicationName': u'application',
        u'com.apple.print.JobInfo.PMJobOwner': u'owner'
    }

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a CUPS IPP file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse CUPS IPP Header with error: {0:s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            raise errors.UnableToParseFile(
                u'[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logging.debug(
                u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_mediator.GetDisplayName()))

        # Read the pairs extracting the name and the value.
        data_dict = {}
        name, value = self.ReadPair(parser_mediator, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self.ReadPair(parser_mediator, file_object)

        # TODO: Refactor to use a lookup table to do event production.
        time_dict = {}
        for key, value in data_dict.items():
            if key.startswith(u'date-time-') or key.startswith(u'time-'):
                time_dict[key] = value
                del data_dict[key]

        if u'date-time-at-creation' in time_dict:
            event_object = CupsIppEvent(time_dict[u'date-time-at-creation'][0],
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'date-time-at-processing' in time_dict:
            event_object = CupsIppEvent(
                time_dict[u'date-time-at-processing'][0],
                eventdata.EventTimestamp.START_TIME, data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'date-time-at-completed' in time_dict:
            event_object = CupsIppEvent(
                time_dict[u'date-time-at-completed'][0],
                eventdata.EventTimestamp.END_TIME, data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-creation' in time_dict:
            time_value = time_dict[u'time-at-creation'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-processing' in time_dict:
            time_value = time_dict[u'time-at-processing'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.START_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

        if u'time-at-completed' in time_dict:
            time_value = time_dict[u'time-at-completed'][0]
            timestamp = timelib.Timestamp.FromPosixTime(time_value)
            event_object = CupsIppEvent(timestamp,
                                        eventdata.EventTimestamp.END_TIME,
                                        data_dict)
            parser_mediator.ProduceEvent(event_object)

    def ReadPair(self, parser_mediator, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: a file-like object that points to a file.

    Returns:
      A list of name and value. If name and value cannot be read both are
      set to None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            _ = self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported identifier in file: {1:s}.'.format(
                    self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        # Name = Length name + name + 0x00
        try:
            name = self.PAIR_NAME.parse_stream(file_object).text
        except (IOError, construct.FieldError):
            logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format(
                self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        try:
            if type_id in [
                    self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                    self.TYPE_ENUMERATION
            ]:
                value = self.INTEGER.parse_stream(file_object).integer

            elif type_id == self.TYPE_BOOL:
                value = bool(self.BOOLEAN.parse_stream(file_object).integer)

            elif type_id == self.TYPE_DATETIME:
                datetime = self.DATETIME.parse_stream(file_object)
                value = timelib.Timestamp.FromRFC2579Datetime(
                    datetime.year, datetime.month, datetime.day, datetime.hour,
                    datetime.minutes, datetime.seconds, datetime.deciseconds,
                    datetime.direction_from_utc, datetime.hours_from_utc,
                    datetime.minutes_from_utc)

            else:
                value = self.TEXT.parse_stream(file_object)

        except (IOError, UnicodeDecodeError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported value in file: {1:s}.'.format(
                    self.NAME, parser_mediator.GetDisplayName()))
            return None, None

        return name, value
Beispiel #23
0
class BinaryCookieParser(interface.FileObjectParser):
    """Parser for Safari Binary Cookie files."""

    NAME = u'binary_cookies'
    DESCRIPTION = u'Parser for Safari Binary Cookie files.'

    COOKIE_HEADER = construct.Struct(
        u'binary_cookie_header', construct.UBInt32(u'pages'),
        construct.Array(lambda ctx: ctx.pages,
                        construct.UBInt32(u'page_sizes')))

    COOKIE_DATA = construct.Struct(u'binary_cookie_data',
                                   construct.ULInt32(u'size'),
                                   construct.Bytes(u'unknown_1', 4),
                                   construct.ULInt32(u'flags'),
                                   construct.Bytes(u'unknown_2', 4),
                                   construct.ULInt32(u'url_offset'),
                                   construct.ULInt32(u'name_offset'),
                                   construct.ULInt32(u'path_offset'),
                                   construct.ULInt32(u'value_offset'),
                                   construct.Bytes(u'end_of_cookie', 8),
                                   construct.LFloat64(u'expiration_date'),
                                   construct.LFloat64(u'creation_date'))

    PAGE_DATA = construct.Struct(
        u'page_data', construct.Bytes(u'header', 4),
        construct.ULInt32(u'number_of_cookies'),
        construct.Array(lambda ctx: ctx.number_of_cookies,
                        construct.ULInt32(u'offsets')))

    # Cookie flags.
    COOKIE_FLAG_NONE = 0
    COOKIE_FLAG_SECURE = 1
    COOKIE_FLAG_UNKNOWN = 2
    COOKIE_FLAG_HTTP_ONLY = 4

    def __init__(self):
        """Initializes a parser object."""
        super(BinaryCookieParser, self).__init__()
        self._cookie_plugins = (
            cookie_plugins_manager.CookiePluginsManager.GetPlugins())

    def _ParsePage(self, page_data, parser_mediator):
        """Extract events from a page and produce events.

    Args:
      page_data: Raw bytes of the page.
      file_entry: The file entry (instance of dfvfs.FileEntry).
      parser_mediator: A parser mediator object (instance of ParserMediator).
    """
        try:
            page = self.PAGE_DATA.parse(page_data)
        except construct.FieldError:
            parser_mediator.ProduceParseError(u'Unable to parse page')
            return

        for page_offset in page.offsets:
            try:
                cookie = self.COOKIE_DATA.parse(page_data[page_offset:])
            except construct.FieldError:
                message = u'Unable to parse cookie data from offset: {0:d}'.format(
                    page_offset)
                parser_mediator.ProduceParseError(message)
                continue

            # The offset is determine by the range between the start of the current
            # offset until the start of the next offset. Thus we need to determine
            # the proper ordering of the offsets, since they are not always in the
            # same ordering.
            offset_dict = {
                cookie.url_offset: u'url',
                cookie.name_offset: u'name',
                cookie.value_offset: u'value',
                cookie.path_offset: u'path'
            }

            offsets = sorted(offset_dict.keys())
            offsets.append(cookie.size + page_offset)

            # TODO: Find a better approach to parsing the data than this.
            data_dict = {}
            for current_offset in range(0, len(offsets) - 1):
                # Get the current offset and the offset for the next entry.
                start, end = offsets[current_offset:current_offset + 2]
                value = offset_dict.get(offsets[current_offset])
                # Read the data.
                data_all = page_data[start + page_offset:end + page_offset]
                data, _, _ = data_all.partition(b'\x00')
                data_dict[value] = data

            url = data_dict.get(u'url')
            cookie_name = data_dict.get(u'name')
            cookie_value = data_dict.get(u'value')
            path = data_dict.get(u'path')

            flags = []
            flag_int = cookie.flags
            if flag_int & self.COOKIE_FLAG_HTTP_ONLY:
                flags.append(u'HttpOnly')
            if flag_int & self.COOKIE_FLAG_UNKNOWN:
                flags.append(u'Unknown')
            if flag_int & self.COOKIE_FLAG_SECURE:
                flags.append(u'Secure')

            cookie_flags = u'|'.join(flags)

            if cookie.creation_date:
                event_object = BinaryCookieEvent(
                    cookie.creation_date,
                    eventdata.EventTimestamp.CREATION_TIME, cookie_flags, url,
                    cookie_value, cookie_name, path)
                parser_mediator.ProduceEvent(event_object)

            if cookie.expiration_date:
                event_object = BinaryCookieEvent(
                    cookie.expiration_date,
                    eventdata.EventTimestamp.EXPIRATION_TIME, cookie_flags,
                    url, cookie_value, cookie_name, path)
                parser_mediator.ProduceEvent(event_object)

            for cookie_plugin in self._cookie_plugins:
                try:
                    cookie_plugin.UpdateChainAndProcess(
                        parser_mediator,
                        cookie_name=data_dict.get(u'name'),
                        cookie_data=data_dict.get(u'value'),
                        url=data_dict.get(u'url'))
                except errors.WrongPlugin:
                    pass

    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses a Safari binary cookie file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        # Start by verifying magic value.
        # We do this here instead of in the header parsing routine due to the
        # fact that we read an integer there and create an array, which is part
        # of the header. For false hits this could end up with reading large chunks
        # of data, which we don't want for false hits.
        magic = file_object.read(4)
        if magic != b'cook':
            raise errors.UnableToParseFile(
                u'The file is not a Binary Cookie file. Unsupported file signature.'
            )

        try:
            header = self.COOKIE_HEADER.parse_stream(file_object)
        except (IOError, construct.ArrayError, construct.FieldError):
            raise errors.UnableToParseFile(
                u'The file is not a Binary Cookie file (bad header).')

        for page_size in header.page_sizes:
            page = file_object.read(page_size)
            if len(page) != page_size:
                parser_mediator.ProduceParseError(
                    u'Unable to continue parsing Binary Cookie file')
                break

            self._ParsePage(page, parser_mediator)
Beispiel #24
0
    c.Magic(_HANDSHAKE_PREFIX),
    c.Bytes('reserved', 8),     # 64 bitfield
    c.Bytes('info_hash', 20),   # 160 bit hash
    c.Bytes('peer_id', 20))     # 160 bit hash

# Reserved bitfield options
extensions = { 
    'commands': 20, # support for Extension Protocol (BEP 0010)
}

# support for Peer Metadata Exchange (BEP 0009)
UT_METADATA = 3
extended_commands = collections.OrderedDict(ut_metadata=UT_METADATA)

_message = c.Struct('message', 
        c.UBInt32('length'), 
        c.Bytes('payload', lambda ctx: ctx.length))

Bytes = lambda name: c.ExprAdapter(c.OptionalGreedyRange(c.StaticField(name, 1)), 
    encoder=lambda obj, ctx : list(obj),
    decoder=lambda obj, ctx : ''.join(obj)
)

_commands = {
    'choke'         : [c.Magic('\x00')],
    'unchoke'       : [c.Magic('\x01')],
    'interested'    : [c.Magic('\x02')],
    'uninterested'  : [c.Magic('\x03')],
    'have'          : [c.Magic('\x04'), c.UBInt32('index')],
    'bitfield'      : [c.Magic('\x05'), Bytes('bits')],
    'request'       : [c.Magic('\x06'), c.UBInt32('index'), c.UBInt32('begin'), c.UBInt32('length')],
Beispiel #25
0
class KeychainParser(interface.BaseParser):
  """Parser for Keychain files."""

  NAME = 'mac_keychain'
  DESCRIPTION = u'Parser for Mac OS X Keychain files.'

  KEYCHAIN_MAGIC_HEADER = 'kych'
  KEYCHAIN_MAJOR_VERSION = 1
  KEYCHAIN_MINOR_VERSION = 0

  RECORD_TYPE_APPLICATION = 0x80000000
  RECORD_TYPE_INTERNET = 0x80000001

  # DB HEADER.
  KEYCHAIN_DB_HEADER = construct.Struct(
      'db_header',
      construct.String('magic', 4),
      construct.UBInt16('major_version'),
      construct.UBInt16('minor_version'),
      construct.UBInt32('header_size'),
      construct.UBInt32('schema_offset'),
      construct.Padding(4))

  # DB SCHEMA.
  KEYCHAIN_DB_SCHEMA = construct.Struct(
      'db_schema',
      construct.UBInt32('size'),
      construct.UBInt32('number_of_tables'))
  # For each number_of_tables, the schema has a TABLE_OFFSET with the
  # offset starting in the DB_SCHEMA.
  TABLE_OFFSET = construct.UBInt32('table_offset')

  TABLE_HEADER = construct.Struct(
      'table_header',
      construct.UBInt32('table_size'),
      construct.UBInt32('record_type'),
      construct.UBInt32('number_of_records'),
      construct.UBInt32('first_record'),
      construct.UBInt32('index_offset'),
      construct.Padding(4),
      construct.UBInt32('recordnumbercount'))

  RECORD_HEADER = construct.Struct(
      'record_header',
      construct.UBInt32('entry_length'),
      construct.Padding(12),
      construct.UBInt32('ssgp_length'),
      construct.Padding(4),
      construct.UBInt32('creation_time'),
      construct.UBInt32('last_mod_time'),
      construct.UBInt32('text_description'),
      construct.Padding(4),
      construct.UBInt32('comments'),
      construct.Padding(8),
      construct.UBInt32('entry_name'),
      construct.Padding(20),
      construct.UBInt32('account_name'),
      construct.Padding(4))
  RECORD_HEADER_APP = construct.Struct(
      'record_entry_app',
      RECORD_HEADER,
      construct.Padding(4))
  RECORD_HEADER_INET = construct.Struct(
      'record_entry_inet',
      RECORD_HEADER,
      construct.UBInt32('where'),
      construct.UBInt32('protocol'),
      construct.UBInt32('type'),
      construct.Padding(4),
      construct.UBInt32('url'))

  TEXT = construct.PascalString(
      'text', length_field = construct.UBInt32('length'))
  TIME = construct.Struct(
      'timestamp',
      construct.String('year', 4),
      construct.String('month', 2),
      construct.String('day', 2),
      construct.String('hour', 2),
      construct.String('minute', 2),
      construct.String('second', 2),
     construct.Padding(2))
  TYPE_TEXT = construct.String('type', 4)

  # TODO: add more protocols.
  _PROTOCOL_TRANSLATION_DICT = {
      u'htps': u'https',
      u'smtp': u'smtp',
      u'imap': u'imap',
      u'http': u'http'}

  def _GetTimestampFromEntry(self, parser_context, file_entry, structure):
    """Parse a time entry structure into a microseconds since Epoch in UTC.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      structure: TIME entry structure:
                 year: String with the number of the year.
                 month: String with the number of the month.
                 day: String with the number of the day.
                 hour: String with the number of the month.
                 minute: String with the number of the minute.
                 second: String with the number of the second.

    Returns:
      Microseconds since Epoch in UTC.
    """
    try:
      return timelib.Timestamp.FromTimeParts(
          int(structure.year, 10), int(structure.month, 10),
          int(structure.day, 10), int(structure.hour, 10),
          int(structure.minute, 10), int(structure.second, 10))
    except ValueError:
      logging.warning(
          u'[{0:s}] Invalid keychain time {1!s} in file: {2:s}'.format(
              self.NAME, parser_context.GetDisplayName(file_entry), structure))
      return 0

  def _ReadEntryApplication(self, parser_context, file_object, file_entry=None):
    """Extracts the information from an application password entry.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_object: A file-like object that points to an Keychain file.
      file_entry: Optional file entry object (instance of dfvfs.FileEntry).
                  The default is None.
    """
    offset = file_object.tell()
    try:
      record = self.RECORD_HEADER_APP.parse_stream(file_object)
    except (IOError, construct.FieldError):
      logging.warning((
          u'[{0:s}] Unsupported record header at 0x{1:08x} in file: '
          u'{2:s}').format(
              self.NAME, offset, parser_context.GetDisplayName(file_entry)))
      return

    (ssgp_hash, creation_time, last_mod_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_context, file_entry, file_object, record.record_header, offset)

    # Move to the end of the record, and then, prepared for the next record.
    file_object.seek(
        record.record_header.entry_length + offset - file_object.tell(),
        os.SEEK_CUR)
    event_object = KeychainApplicationRecordEvent(
        creation_time, eventdata.EventTimestamp.CREATION_TIME,
        entry_name, account_name, text_description, comments, ssgp_hash)
    parser_context.ProduceEvent(
        event_object, parser_name=self.NAME, file_entry=file_entry)

    if creation_time != last_mod_time:
      event_object = KeychainApplicationRecordEvent(
          last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME,
          entry_name, account_name, text_description, comments, ssgp_hash)
      parser_context.ProduceEvent(
          event_object, parser_name=self.NAME, file_entry=file_entry)

  def _ReadEntryHeader(
      self, parser_context, file_entry, file_object, record, offset):
    """Read the common record attributes.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      file_object: A file-like object that points to an Keychain file.
      record: Structure with the header of the record.
      offset: First byte of the record.

    Returns:
      A list of:
        ssgp_hash: Hash of the encrypted data (passwd, cert, note).
        creation_time: When the entry was created.
        last_mod_time: Last time the entry was updated.
        text_description: A brief description of the entry.
        entry_name: Name of the entry
        account_name: Name of the account.
    """
    # Info: The hash header always start with the string ssgp follow by
    #       the hash. Furthermore The fields are always a multiple of four.
    #       Then if it is not multiple the value is padded by 0x00.
    ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:])

    file_object.seek(
        record.creation_time - file_object.tell() + offset - 1, os.SEEK_CUR)
    creation_time = self._GetTimestampFromEntry(
        parser_context, file_entry, self.TIME.parse_stream(file_object))

    file_object.seek(
        record.last_mod_time - file_object.tell() + offset - 1, os.SEEK_CUR)
    last_mod_time = self._GetTimestampFromEntry(
        parser_context, file_entry, self.TIME.parse_stream(file_object))

    # The comment field does not always contain data.
    if record.text_description:
      file_object.seek(
          record.text_description - file_object.tell() + offset -1,
          os.SEEK_CUR)
      text_description = self.TEXT.parse_stream(file_object)
    else:
      text_description = u'N/A'

    # The comment field does not always contain data.
    if record.comments:
      file_object.seek(
          record.text_description - file_object.tell() + offset -1,
          os.SEEK_CUR)
      comments = self.TEXT.parse_stream(file_object)
    else:
      comments = u'N/A'

    file_object.seek(
        record.entry_name - file_object.tell() + offset - 1, os.SEEK_CUR)
    entry_name = self.TEXT.parse_stream(file_object)

    file_object.seek(
        record.account_name - file_object.tell() + offset - 1, os.SEEK_CUR)
    account_name = self.TEXT.parse_stream(file_object)

    return (
        ssgp_hash, creation_time, last_mod_time,
        text_description, comments, entry_name, account_name)

  def _ReadEntryInternet(self, parser_context, file_object, file_entry=None):
    """Extracts the information from an Internet password entry.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_object: A file-like object that points to an Keychain file.
      file_entry: Optional file entry object (instance of dfvfs.FileEntry).
                  The default is None.
    """
    offset = file_object.tell()
    try:
      record = self.RECORD_HEADER_INET.parse_stream(file_object)
    except (IOError, construct.FieldError):
      logging.warning((
          u'[{0:s}] Unsupported record header at 0x{1:08x} in file: '
          u'{2:s}').format(
              self.NAME, offset, parser_context.GetDisplayName(file_entry)))
      return

    (ssgp_hash, creation_time, last_mod_time, text_description,
     comments, entry_name, account_name) = self._ReadEntryHeader(
         parser_context, file_entry, file_object, record.record_header, offset)
    if not record.where:
      where = u'N/A'
      protocol = u'N/A'
      type_protocol = u'N/A'
    else:
      file_object.seek(
          record.where - file_object.tell() + offset - 1, os.SEEK_CUR)
      where = self.TEXT.parse_stream(file_object)
      file_object.seek(
          record.protocol - file_object.tell() + offset - 1, os.SEEK_CUR)
      protocol = self.TYPE_TEXT.parse_stream(file_object)
      file_object.seek(
          record.type - file_object.tell() + offset - 1, os.SEEK_CUR)
      type_protocol = self.TEXT.parse_stream(file_object)
      type_protocol = self._PROTOCOL_TRANSLATION_DICT.get(
          type_protocol, type_protocol)
      if record.url:
        file_object.seek(
            record.url - file_object.tell() + offset - 1, os.SEEK_CUR)
        url = self.TEXT.parse_stream(file_object)
        where = u'{0:s}{1:s}'.format(where, url)

    # Move to the end of the record, and then, prepared for the next record.
    file_object.seek(
        record.record_header.entry_length + offset - file_object.tell(),
        os.SEEK_CUR)

    event_object = KeychainInternetRecordEvent(
        creation_time, eventdata.EventTimestamp.CREATION_TIME,
        entry_name, account_name, text_description,
        comments, where, protocol, type_protocol, ssgp_hash)
    parser_context.ProduceEvent(
        event_object, parser_name=self.NAME, file_entry=file_entry)

    if creation_time != last_mod_time:
      event_object = KeychainInternetRecordEvent(
          last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME,
          entry_name, account_name, text_description,
          comments, where, protocol, type_protocol)
      parser_context.ProduceEvent(
          event_object, parser_name=self.NAME, file_entry=file_entry)

  def _VerifyStructure(self, file_object):
    """Verify that we are dealing with an Keychain entry.

    Args:
      file_object: A file-like object that points to an Keychain file.

    Returns:
      A list of table positions if it is a keychain, None otherwise.
    """
    # INFO: The HEADER KEYCHAIN:
    # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z]
    # Where the table offset is relative to the first byte of the DB Schema,
    # then we must add to this offset the size of the [DBHEADER].
    try:
      db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return
    if (db_header.minor_version != self.KEYCHAIN_MINOR_VERSION or
        db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or
        db_header.magic != self.KEYCHAIN_MAGIC_HEADER):
      return

    # Read the database schema and extract the offset for all the tables.
    # They are ordered by file position from the top to the bottom of the file.
    try:
      db_schema = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object)
    except (IOError, construct.FieldError):
      return
    table_offsets = []
    for _ in range(db_schema.number_of_tables):
      try:
        table_offset = self.TABLE_OFFSET.parse_stream(file_object)
      except (IOError, construct.FieldError):
        return
      table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof())
    return table_offsets

  def Parse(self, parser_context, file_entry):
    """Extract data from a Keychain file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
    file_object = file_entry.GetFileObject()
    table_offsets = self._VerifyStructure(file_object)
    if not table_offsets:
      file_object.close()
      raise errors.UnableToParseFile(u'The file is not a Keychain file.')

    for table_offset in table_offsets:
      # Skipping X bytes, unknown data at this point.
      file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR)
      try:
        table = self.TABLE_HEADER.parse_stream(file_object)
      except construct.FieldError as exception:
        logging.warning((
            u'[{0:s}] Unable to parse table header in file: {1:s} '
            u'with error: {2:s}.').format(
                self.NAME, parser_context.GetDisplayName(file_entry),
                exception))
        continue

      # Table_offset: absolute byte in the file where the table starts.
      # table.first_record: first record in the table, relative to the
      #                     first byte of the table.
      file_object.seek(
          table_offset + table.first_record - file_object.tell(), os.SEEK_CUR)

      if table.record_type == self.RECORD_TYPE_INTERNET:
        for _ in range(table.number_of_records):
          self._ReadEntryInternet(
              parser_context, file_object, file_entry=file_entry)

      elif table.record_type == self.RECORD_TYPE_APPLICATION:
        for _ in range(table.number_of_records):
          self._ReadEntryApplication(
              parser_context, file_object, file_entry=file_entry)

    file_object.close()
Beispiel #26
0
    0: 'EMERGENCY',
    1: 'ALERT',
    2: 'CRITICAL',
    3: 'ERROR',
    4: 'WARNING',
    5: 'NOTICE',
    6: 'INFO',
    7: 'DEBUG'
}

# ASL Required Structures.

# ASL header file structure
ASL_HEADER_STRUCT = construct.Struct('asl_header_struct',
                                     construct.String('magic', 12),
                                     construct.UBInt32('version'),
                                     construct.UBInt64('offset'),
                                     construct.UBInt64('timestamp'),
                                     construct.UBInt32('cache_size'),
                                     construct.UBInt64('last_offset'),
                                     construct.Padding(36))

# Record = [Heap][Record_Struct][Values]
# Heap = [Group of Dyn_Value]*
# Values = [ADDR_TXT][ADDR_TXT][ADDR_TXT][ADDR_TXT](2x[ADDR_TXT])*
#            (Host)   (Sender) (Facility) (message)

# Record Struct
ASL_RECORD_STRUCT = construct.Struct('asl_record_struct', construct.Padding(2),
                                     construct.UBInt32('tam_entry'),
                                     construct.UBInt64('next_offset'),
Beispiel #27
0
class FirefoxCacheParser(parser.BaseParser):
    """Extract cached records from Firefox."""

    NAME = 'firefox_cache'

    # Number of bytes allocated to a cache record metadata.
    RECORD_HEADER_SIZE = 36

    # Initial size of Firefox >= 4 cache files.
    INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4

    # Smallest possible block size in Firefox cache files.
    MIN_BLOCK_SIZE = 256

    RECORD_HEADER_STRUCT = construct.Struct('record_header',
                                            construct.UBInt16('major'),
                                            construct.UBInt16('minor'),
                                            construct.UBInt32('location'),
                                            construct.UBInt32('fetch_count'),
                                            construct.UBInt32('last_fetched'),
                                            construct.UBInt32('last_modified'),
                                            construct.UBInt32('expire_time'),
                                            construct.UBInt32('data_size'),
                                            construct.UBInt32('request_size'),
                                            construct.UBInt32('info_size'))

    ALTERNATIVE_CACHE_NAME = (pyparsing.Word(pyparsing.hexnums, exact=5) +
                              pyparsing.Word("m", exact=1) +
                              pyparsing.Word(pyparsing.nums, exact=2))

    FIREFOX_CACHE_CONFIG = collections.namedtuple(
        u'firefox_cache_config', u'block_size first_record_offset')

    REQUEST_METHODS = [
        u'GET', 'HEAD', 'POST', 'PUT', 'DELETE', u'TRACE', 'OPTIONS',
        'CONNECT', 'PATCH'
    ]

    def __GetFirefoxConfig(self, file_entry):
        """Determine cache file block size. Raises exception if not found."""

        if file_entry.name[0:9] != '_CACHE_00':
            try:
                # Match alternative filename. Five hex characters + 'm' + two digit
                # number, e.g. "01ABCm02". 'm' is for metadata. Cache files with 'd'
                # instead contain data only.
                self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name)
            except pyparsing.ParseException:
                raise errors.UnableToParseFile(u'Not a Firefox cache file.')

        file_object = file_entry.GetFileObject()

        # There ought to be a valid record within the first 4MB. We use this
        # limit to prevent reading large invalid files.
        to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE)

        while file_object.get_offset() < to_read:
            offset = file_object.get_offset()

            try:
                # We have not yet determined the block size, so we use the smallest
                # possible size.
                record = self.__NextRecord(file_entry.name, file_object,
                                           self.MIN_BLOCK_SIZE)

                record_size = (self.RECORD_HEADER_SIZE + record.request_size +
                               record.info_size)

                if record_size >= 4096:
                    # _CACHE_003_
                    block_size = 4096
                elif record_size >= 1024:
                    # _CACHE_002_
                    block_size = 1024
                else:
                    # _CACHE_001_
                    block_size = 256

                return self.FIREFOX_CACHE_CONFIG(block_size, offset)

            except IOError:
                logging.debug(u'{0:s}:{1:d}: Invalid record.'.format(
                    file_entry.name, offset))

        raise errors.UnableToParseFile(u'Could not find a valid cache record. '
                                       u'Not a Firefox cache file.')

    def __Accept(self, candidate, block_size):
        """Determine whether the candidate is a valid cache record."""

        record_size = (self.RECORD_HEADER_SIZE + candidate.request_size +
                       candidate.info_size)

        return (candidate.request_size > 0 and candidate.fetch_count > 0
                and candidate.major == 1 and record_size // block_size < 256)

    def __NextRecord(self, filename, file_object, block_size):
        """Provide the next cache record."""

        offset = file_object.get_offset()

        try:
            candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError):
            raise IOError(u'Unable to parse stream.')

        if not self.__Accept(candidate, block_size):
            # Move reader to next candidate block.
            file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR)
            raise IOError(u'Not a valid Firefox cache record.')

        # The last byte in a request is null.
        url = file_object.read(candidate.request_size)[:-1]

        # HTTP response header, even elements are keys, odd elements values.
        headers = file_object.read(candidate.info_size)

        request_method, _, _ = (
            headers.partition('request-method\x00')[2].partition('\x00'))

        _, _, response_head = headers.partition('response-head\x00')

        response_code, _, _ = response_head.partition("\r\n")

        if request_method not in self.REQUEST_METHODS:
            logging.debug(
                u'{0:s}:{1:d}: Unknown HTTP method "{2:s}". Response "{3:s}"'.
                format(filename, offset, request_method, headers))

        if response_code[0:4] != 'HTTP':
            logging.debug(
                u'{0:s}:{1:d}: Could not determine HTTP response code. '
                u'Response headers: "{2:s}".'.format(filename, offset,
                                                     headers))

        # A request can span multiple blocks, so we use modulo.
        _, remainder = divmod(file_object.get_offset() - offset, block_size)

        # Move reader to next candidate block. Include the null-byte skipped above.
        file_object.seek(block_size - remainder, os.SEEK_CUR)

        return FirefoxCacheEvent(candidate, request_method, url, response_code)

    def Parse(self, file_entry):
        """Extract records from a Firefox cache file."""

        firefox_config = self.__GetFirefoxConfig(file_entry)

        file_object = file_entry.GetFileObject()

        file_object.seek(firefox_config.first_record_offset)

        while file_object.get_offset() < file_object.get_size():
            try:
                yield self.__NextRecord(file_entry.name, file_object,
                                        firefox_config.block_size)
            except IOError:
                logging.debug(u'{0:s}:{1:d}: Invalid cache record.'.format(
                    file_entry.name,
                    file_object.get_offset() - self.MIN_BLOCK_SIZE))
Beispiel #28
0
def puint():
    PINT = construct.Struct("PINT", construct.UBInt32("x"))
    return PINT
Beispiel #29
0
class JavaIDXParser(interface.BaseParser):
    """Parse Java IDX files for download events.

  There are five structures defined. 6.02 files had one generic section
  that retained all data. From 6.03, the file went to a multi-section
  format where later sections were optional and had variable-lengths.
  6.03, 6.04, and 6.05 files all have their main data section (#2)
  begin at offset 128. The short structure is because 6.05 files
  deviate after the 8th byte. So, grab the first 8 bytes to ensure it's
  valid, get the file version, then continue on with the correct
  structures.
  """

    NAME = 'java_idx'
    DESCRIPTION = u'Parser for Java IDX files.'

    IDX_SHORT_STRUCT = construct.Struct('magic', construct.UBInt8('busy'),
                                        construct.UBInt8('incomplete'),
                                        construct.UBInt32('idx_version'))

    IDX_602_STRUCT = construct.Struct(
        'IDX_602_Full', construct.UBInt16('null_space'),
        construct.UBInt8('shortcut'), construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.PascalString('version_string',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespace',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    IDX_605_SECTION_ONE_STRUCT = construct.Struct(
        'IDX_605_Section1', construct.UBInt8('shortcut'),
        construct.UBInt32('content_length'),
        construct.UBInt64('last_modified_date'),
        construct.UBInt64('expiration_date'),
        construct.UBInt64('validation_date'), construct.UBInt8('signed'),
        construct.UBInt32('sec2len'), construct.UBInt32('sec3len'),
        construct.UBInt32('sec4len'))

    IDX_605_SECTION_TWO_STRUCT = construct.Struct(
        'IDX_605_Section2',
        construct.PascalString('version',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('url',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('namespec',
                               length_field=construct.UBInt16('length')),
        construct.PascalString('ip_address',
                               length_field=construct.UBInt16('length')),
        construct.UBInt32('FieldCount'))

    # Java uses Pascal-style strings, but with a 2-byte length field.
    JAVA_READUTF_STRING = construct.Struct(
        'Java.ReadUTF',
        construct.PascalString('string',
                               length_field=construct.UBInt16('length')))

    def Parse(self, parser_context, file_entry):
        """Extract data from a Java cache IDX file.

    This is the main parsing engine for the parser. It determines if
    the selected file is a proper IDX file. It then checks the file
    version to determine the correct structure to apply to extract
    data.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        try:
            magic = self.IDX_SHORT_STRUCT.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            raise errors.UnableToParseFile(
                u'Unable to parse Java IDX file with error: {0:s}.'.format(
                    exception))

        # Fields magic.busy and magic.incomplete are normally 0x00. They
        # are set to 0x01 if the file is currently being downloaded. Logic
        # checks for > 1 to avoid a race condition and still reject any
        # file with other data.
        # Field magic.idx_version is the file version, of which only
        # certain versions are supported.
        if magic.busy > 1 or magic.incomplete > 1:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        if not magic.idx_version in [602, 603, 604, 605]:
            raise errors.UnableToParseFile(u'Not a valid Java IDX file')

        # Obtain the relevant values from the file. The last modified date
        # denotes when the file was last modified on the HOST. For example,
        # when the file was uploaded to a web server.
        if magic.idx_version == 602:
            section_one = self.IDX_602_STRUCT.parse_stream(file_object)
            last_modified_date = section_one.last_modified_date
            url = section_one.url
            ip_address = 'Unknown'
            http_header_count = section_one.FieldCount
        elif magic.idx_version in [603, 604, 605]:

            # IDX 6.03 and 6.04 have two unused bytes before the structure.
            if magic.idx_version in [603, 604]:
                file_object.read(2)

            # IDX 6.03, 6.04, and 6.05 files use the same structures for the
            # remaining data.
            section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(
                file_object)
            last_modified_date = section_one.last_modified_date
            if file_object.get_size() > 128:
                file_object.seek(128)  # Static offset for section 2.
                section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(
                    file_object)
                url = section_two.url
                ip_address = section_two.ip_address
                http_header_count = section_two.FieldCount
            else:
                url = 'Unknown'
                ip_address = 'Unknown'
                http_header_count = 0

        # File offset is now just prior to HTTP headers. Make sure there
        # are headers, and then parse them to retrieve the download date.
        download_date = None
        for field in range(0, http_header_count):
            field = self.JAVA_READUTF_STRING.parse_stream(file_object)
            value = self.JAVA_READUTF_STRING.parse_stream(file_object)
            if field.string == 'date':
                # Time string "should" be in UTC or have an associated time zone
                # information in the string itself. If that is not the case then
                # there is no reliable method for plaso to determine the proper
                # timezone, so the assumption is that it is UTC.
                download_date = timelib.Timestamp.FromTimeString(
                    value.string, gmt_as_timezone=False)

        if not url or not ip_address:
            raise errors.UnableToParseFile(
                u'Unexpected Error: URL or IP address not found in file.')

        last_modified_timestamp = timelib.Timestamp.FromJavaTime(
            last_modified_date)
        # TODO: Move the timestamp description fields into eventdata.
        event_object = JavaIDXEvent(last_modified_timestamp,
                                    'File Hosted Date', magic.idx_version, url,
                                    ip_address)
        parser_context.ProduceEvent(event_object,
                                    parser_name=self.NAME,
                                    file_entry=file_entry)

        if section_one:
            expiration_date = section_one.get('expiration_date', None)
            if expiration_date:
                expiration_timestamp = timelib.Timestamp.FromJavaTime(
                    expiration_date)
                event_object = JavaIDXEvent(expiration_timestamp,
                                            'File Expiration Date',
                                            magic.idx_version, url, ip_address)
                parser_context.ProduceEvent(event_object,
                                            parser_name=self.NAME,
                                            file_entry=file_entry)

        if download_date:
            event_object = JavaIDXEvent(
                download_date, eventdata.EventTimestamp.FILE_DOWNLOADED,
                magic.idx_version, url, ip_address)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)
Beispiel #30
0
class CupsIppParser(interface.BaseParser):
    """Parser for CUPS IPP files. """

    NAME = 'cups_ipp'
    DESCRIPTION = u'Parser for CUPS IPP files.'

    # INFO:
    # For each file, we have only one document with three different timestamps:
    # Created, process and finished.
    # Format:
    # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03]
    # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE]
    #   GROUP ID: [1byte ID]
    #   PAIR: [TagID][\x00][Name][Value])
    #     TagID: 1 byte integer with the type of "Value".
    #     Name: [Length][Text][\00]
    #       Name can be empty when the name has more than one value.
    #       Example: family name "lopez mata" with more than one surname.
    #       Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] +
    #       Type_Text + [0x00, 0x00] + [0x04, mata, 0x00]
    #     Value: can be integer, boolean, or text provided by TagID.
    #       If boolean, Value: [\x01][0x00(False)] or [\x01(True)]
    #       If integer, Value: [\x04][Integer]
    #       If text,    Value: [Length text][Text][\00]

    # Magic number that identify the CUPS IPP supported version.
    IPP_MAJOR_VERSION = 2
    IPP_MINOR_VERSION = 0
    # Supported Operation ID.
    IPP_OP_ID = 5

    # CUPS IPP File header.
    CUPS_IPP_HEADER = construct.Struct('cups_ipp_header_struct',
                                       construct.UBInt8('major_version'),
                                       construct.UBInt8('minor_version'),
                                       construct.UBInt16('operation_id'),
                                       construct.UBInt32('request_id'))

    # Group ID that indicates the end of the IPP Control file.
    GROUP_END = 3
    # Identification Groups.
    GROUP_LIST = [1, 2, 4, 5, 6, 7]

    # Type ID.
    TYPE_GENERAL_INTEGER = 32
    TYPE_INTEGER = 33
    TYPE_ENUMERATION = 35
    TYPE_BOOL = 34

    # Type of values that can be extracted.
    INTEGER_8 = construct.UBInt8('integer')
    INTEGER_32 = construct.UBInt32('integer')
    TEXT = construct.PascalString('text',
                                  length_field=construct.UBInt8('length'))
    BOOLEAN = construct.Struct('boolean_value', construct.Padding(1),
                               INTEGER_8)
    INTEGER = construct.Struct('integer_value', construct.Padding(1),
                               INTEGER_32)

    # Name of the pair.
    PAIR_NAME = construct.Struct('pair_name', TEXT, construct.Padding(1))

    # Specific CUPS IPP to generic name.
    NAME_PAIR_TRANSLATION = {
        'printer-uri': u'uri',
        'job-uuid': u'job_id',
        'DestinationPrinterID': u'printer_id',
        'job-originating-user-name': u'user',
        'job-name': u'job_name',
        'document-format': u'doc_type',
        'job-originating-host-name': u'computer_name',
        'com.apple.print.JobInfo.PMApplicationName': u'application',
        'com.apple.print.JobInfo.PMJobOwner': u'owner'
    }

    def Parse(self, parser_context, file_entry):
        """Extract a entry from an CUPS IPP file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
        file_object = file_entry.GetFileObject()
        file_object.seek(0, os.SEEK_SET)

        try:
            header = self.CUPS_IPP_HEADER.parse_stream(file_object)
        except (IOError, construct.FieldError) as exception:
            file_object.close()
            raise errors.UnableToParseFile(
                u'Unable to parse CUPS IPP Header with error: {0:s}'.format(
                    exception))

        if (header.major_version != self.IPP_MAJOR_VERSION
                or header.minor_version != self.IPP_MINOR_VERSION):
            file_object.close()
            raise errors.UnableToParseFile(
                u'[{0:s}] Unsupported version number.'.format(self.NAME))

        if header.operation_id != self.IPP_OP_ID:
            # Warn if the operation ID differs from the standard one. We should be
            # able to parse the file nonetheless.
            logging.debug(
                u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.
                format(self.NAME, parser_context.GetDisplayName(file_entry)))

        # Read the pairs extracting the name and the value.
        data_dict = {}
        name, value = self.ReadPair(parser_context, file_entry, file_object)
        while name or value:
            # Translate the known "name" CUPS IPP to a generic name value.
            pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name)
            data_dict.setdefault(pretty_name, []).append(value)
            name, value = self.ReadPair(parser_context, file_entry,
                                        file_object)

        # Yield the events.
        if u'time-at-creation' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-creation'][0],
                                        eventdata.EventTimestamp.CREATION_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        if u'time-at-processing' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-processing'][0],
                                        eventdata.EventTimestamp.START_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        if u'time-at-completed' in data_dict:
            event_object = CupsIppEvent(data_dict['time-at-completed'][0],
                                        eventdata.EventTimestamp.END_TIME,
                                        data_dict)
            parser_context.ProduceEvent(event_object,
                                        parser_name=self.NAME,
                                        file_entry=file_entry)

        file_object.close()

    def ReadPair(self, parser_context, file_entry, file_object):
        """Reads an attribute name and value pair from a CUPS IPP event.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
      file_object: a file-like object that points to a file.

    Returns:
      A list of name and value. If name and value cannot be read both are
      set to None.
    """
        # Pair = Type ID + Name + Value.
        try:
            # Can be:
            #   Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'.
            #   IDtag = Tag ID (1byte) + '0x00'.
            type_id = self.INTEGER_8.parse_stream(file_object)
            if type_id == self.GROUP_END:
                return None, None

            elif type_id in self.GROUP_LIST:
                # If it is a group ID we must read the next byte that contains
                # the first TagID.
                type_id = self.INTEGER_8.parse_stream(file_object)

            # 0x00 separator character.
            _ = self.INTEGER_8.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported identifier in file: {1:s}.'.format(
                    self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        # Name = Length name + name + 0x00
        try:
            name = self.PAIR_NAME.parse_stream(file_object).text
        except (IOError, construct.FieldError):
            logging.warning(u'[{0:s}] Unsupported name in file: {1:s}.'.format(
                self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        # Value: can be integer, boolean or text select by Type ID.
        try:
            if type_id in [
                    self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER,
                    self.TYPE_ENUMERATION
            ]:
                value = self.INTEGER.parse_stream(file_object).integer

            elif type_id == self.TYPE_BOOL:
                value = bool(self.BOOLEAN.parse_stream(file_object).integer)

            else:
                value = self.TEXT.parse_stream(file_object)

        except (IOError, construct.FieldError):
            logging.warning(
                u'[{0:s}] Unsupported value in file: {1:s}.'.format(
                    self.NAME, parser_context.GetDisplayName(file_entry)))
            return None, None

        return name, value