Beispiel #1
0
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser):
    """Parse popularity contest log files."""

    NAME = u'popularity_contest'
    DESCRIPTION = u'Parser for popularity contest log files.'

    _ASCII_PRINTABLES = pyparsing.printables
    _UNICODE_PRINTABLES = u''.join(
        py2to3.UNICHR(character) for character in range(65536)
        if not py2to3.UNICHR(character).isspace())

    MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName(u'mru')
    PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName(u'package')
    TAG = pyparsing.QuotedString(u'<',
                                 endQuoteChar=u'>').setResultsName(u'tag')
    TIMESTAMP = text_parser.PyparsingConstants.INTEGER.setResultsName(
        u'timestamp')

    HEADER = (
        pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') +
        pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP +
        pyparsing.Literal(u'ID:').suppress() +
        pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName(u'id') +
        pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName(u'details'))

    FOOTER = (
        pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') +
        pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP)

    LOG_LINE = (TIMESTAMP.setResultsName(u'atime') +
                TIMESTAMP.setResultsName(u'ctime') +
                (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG)))

    LINE_STRUCTURES = [
        (u'logline', LOG_LINE),
        (u'header', HEADER),
        (u'footer', FOOTER),
    ]

    _ENCODING = u'UTF-8'

    def _ParseLogLine(self, parser_mediator, structure):
        """Parses an event object from the log line.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        # Required fields are <mru> and <atime> and we are not interested in
        # log lines without <mru>.
        if not structure.mru:
            return

        event_data = PopularityContestEventData()
        event_data.mru = structure.mru
        event_data.package = structure.package
        event_data.record_tag = structure.tag

        # The <atime> field (as <ctime>) is always present but could be 0.
        # In case of <atime> equal to 0, we are in <NOFILES> case, safely return
        # without logging.
        if structure.atime:
            # TODO: not doing any check on <tag> fields, even if only informative
            # probably it could be better to check for the expected values.
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=structure.atime)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_LAST_ACCESS)
            parser_mediator.ProduceEventWithEventData(event, event_data)

        if structure.ctime:
            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=structure.ctime)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_ENTRY_MODIFICATION)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        if key not in (u'footer', u'header', u'logline'):
            logging.warning(
                u'PopularityContestParser, unknown structure: {0:s}.'.format(
                    key))
            return

        # TODO: Add anomaly objects for abnormal timestamps, such as when the log
        # timestamp is greater than the session start.
        if key == u'logline':
            self._ParseLogLine(parser_mediator, structure)

        else:
            if not structure.timestamp:
                logging.debug(u'[{0:s}] {1:s} with invalid timestamp.'.format(
                    self.NAME, key))
                return

            event_data = PopularityContestSessionEventData()
            event_data.session = u'{0!s}'.format(structure.session)

            if key == u'header':
                event_data.details = structure.details
                event_data.hostid = structure.id
                event_data.status = u'start'

            elif key == u'footer':
                event_data.status = u'end'

            date_time = dfdatetime_posix_time.PosixTime(
                timestamp=structure.timestamp)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_ADDED)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a Popularity Contest log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): single line from the text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
        try:
            header_struct = self.HEADER.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a Popularity Contest log file, invalid header')
            return False

        if not timelib.Timestamp.FromPosixTime(header_struct.timestamp):
            logging.debug(
                u'Invalid Popularity Contest log file header timestamp.')
            return False

        return True
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser):
    """Parse popularity contest log files."""

    NAME = 'popularity_contest'
    DESCRIPTION = 'Parser for popularity contest log files.'

    _ASCII_PRINTABLES = pyparsing.printables
    _UNICODE_PRINTABLES = ''.join(
        py2to3.UNICHR(character) for character in range(65536)
        if not py2to3.UNICHR(character).isspace())

    MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName('mru')
    PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName('package')
    TAG = pyparsing.QuotedString('<', endQuoteChar='>').setResultsName('tag')

    HEADER = (
        pyparsing.Literal('POPULARITY-CONTEST-').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName('session') +
        pyparsing.Literal('TIME:').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp') +
        pyparsing.Literal('ID:').suppress() +
        pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName('id') +
        pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('details'))

    FOOTER = (
        pyparsing.Literal('END-POPULARITY-CONTEST-').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName('session') +
        pyparsing.Literal('TIME:').suppress() +
        text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp'))

    LOG_LINE = (
        text_parser.PyparsingConstants.INTEGER.setResultsName('atime') +
        text_parser.PyparsingConstants.INTEGER.setResultsName('ctime') +
        (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG)))

    LINE_STRUCTURES = [
        ('logline', LOG_LINE),
        ('header', HEADER),
        ('footer', FOOTER),
    ]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    _ENCODING = 'UTF-8'

    def _ParseLogLine(self, parser_mediator, structure):
        """Extracts events from a log line.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        # Required fields are <mru> and <atime> and we are not interested in
        # log lines without <mru>.
        mru = self._GetValueFromStructure(structure, 'mru')
        if not mru:
            return

        event_data = PopularityContestEventData()
        event_data.mru = mru
        event_data.package = self._GetValueFromStructure(structure, 'package')
        event_data.record_tag = self._GetValueFromStructure(structure, 'tag')

        # The <atime> field (as <ctime>) is always present but could be 0.
        # In case of <atime> equal to 0, we are in <NOFILES> case, safely return
        # without logging.
        access_time = self._GetValueFromStructure(structure, 'atime')
        if access_time:
            # TODO: not doing any check on <tag> fields, even if only informative
            # probably it could be better to check for the expected values.
            date_time = dfdatetime_posix_time.PosixTime(timestamp=access_time)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_LAST_ACCESS)
            parser_mediator.ProduceEventWithEventData(event, event_data)

        change_time = self._GetValueFromStructure(structure, 'ctime')
        if change_time:
            date_time = dfdatetime_posix_time.PosixTime(timestamp=change_time)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_ENTRY_MODIFICATION)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        # TODO: Add anomaly objects for abnormal timestamps, such as when the log
        # timestamp is greater than the session start.
        if key == 'logline':
            self._ParseLogLine(parser_mediator, structure)

        else:
            timestamp = self._GetValueFromStructure(structure, 'timestamp')
            if timestamp is None:
                logger.debug('[{0:s}] {1:s} with invalid timestamp.'.format(
                    self.NAME, key))
                return

            session = self._GetValueFromStructure(structure, 'session')

            event_data = PopularityContestSessionEventData()
            # TODO: determine why session is formatted as a string.
            event_data.session = '{0!s}'.format(session)

            if key == 'header':
                event_data.details = self._GetValueFromStructure(
                    structure, 'details')
                event_data.hostid = self._GetValueFromStructure(
                    structure, 'id')
                event_data.status = 'start'

            elif key == 'footer':
                event_data.status = 'end'

            date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp)
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_ADDED)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a Popularity Contest log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
        try:
            self.HEADER.parseString(line)
        except pyparsing.ParseException:
            logger.debug('Not a Popularity Contest log file, invalid header')
            return False

        return True