class PopularityContestParser(text_parser.PyparsingSingleLineTextParser): """Parse popularity contest log files.""" NAME = u'popularity_contest' DESCRIPTION = u'Parser for popularity contest log files.' _ASCII_PRINTABLES = pyparsing.printables _UNICODE_PRINTABLES = u''.join( py2to3.UNICHR(character) for character in range(65536) if not py2to3.UNICHR(character).isspace()) MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName(u'mru') PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName(u'package') TAG = pyparsing.QuotedString(u'<', endQuoteChar=u'>').setResultsName(u'tag') TIMESTAMP = text_parser.PyparsingConstants.INTEGER.setResultsName( u'timestamp') HEADER = ( pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') + pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP + pyparsing.Literal(u'ID:').suppress() + pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName(u'id') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName(u'details')) FOOTER = ( pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') + pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP) LOG_LINE = (TIMESTAMP.setResultsName(u'atime') + TIMESTAMP.setResultsName(u'ctime') + (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG))) LINE_STRUCTURES = [ (u'logline', LOG_LINE), (u'header', HEADER), (u'footer', FOOTER), ] _ENCODING = u'UTF-8' def _ParseLogLine(self, parser_mediator, structure): """Parses an event object from the log line. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. """ # Required fields are <mru> and <atime> and we are not interested in # log lines without <mru>. if not structure.mru: return event_data = PopularityContestEventData() event_data.mru = structure.mru event_data.package = structure.package event_data.record_tag = structure.tag # The <atime> field (as <ctime>) is always present but could be 0. # In case of <atime> equal to 0, we are in <NOFILES> case, safely return # without logging. if structure.atime: # TODO: not doing any check on <tag> fields, even if only informative # probably it could be better to check for the expected values. date_time = dfdatetime_posix_time.PosixTime( timestamp=structure.atime) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_LAST_ACCESS) parser_mediator.ProduceEventWithEventData(event, event_data) if structure.ctime: date_time = dfdatetime_posix_time.PosixTime( timestamp=structure.ctime) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ENTRY_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. """ if key not in (u'footer', u'header', u'logline'): logging.warning( u'PopularityContestParser, unknown structure: {0:s}.'.format( key)) return # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == u'logline': self._ParseLogLine(parser_mediator, structure) else: if not structure.timestamp: logging.debug(u'[{0:s}] {1:s} with invalid timestamp.'.format( self.NAME, key)) return event_data = PopularityContestSessionEventData() event_data.session = u'{0!s}'.format(structure.session) if key == u'header': event_data.details = structure.details event_data.hostid = structure.id event_data.status = u'start' elif key == u'footer': event_data.status = u'end' date_time = dfdatetime_posix_time.PosixTime( timestamp=structure.timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Popularity Contest log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): single line from the text file. Returns: bool: True if the line was successfully parsed. """ try: header_struct = self.HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a Popularity Contest log file, invalid header') return False if not timelib.Timestamp.FromPosixTime(header_struct.timestamp): logging.debug( u'Invalid Popularity Contest log file header timestamp.') return False return True
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser): """Parse popularity contest log files.""" NAME = 'popularity_contest' DESCRIPTION = 'Parser for popularity contest log files.' _ASCII_PRINTABLES = pyparsing.printables _UNICODE_PRINTABLES = ''.join( py2to3.UNICHR(character) for character in range(65536) if not py2to3.UNICHR(character).isspace()) MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName('mru') PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName('package') TAG = pyparsing.QuotedString('<', endQuoteChar='>').setResultsName('tag') HEADER = ( pyparsing.Literal('POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + pyparsing.Literal('TIME:').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp') + pyparsing.Literal('ID:').suppress() + pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName('id') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('details')) FOOTER = ( pyparsing.Literal('END-POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + pyparsing.Literal('TIME:').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp')) LOG_LINE = ( text_parser.PyparsingConstants.INTEGER.setResultsName('atime') + text_parser.PyparsingConstants.INTEGER.setResultsName('ctime') + (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG))) LINE_STRUCTURES = [ ('logline', LOG_LINE), ('header', HEADER), ('footer', FOOTER), ] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) _ENCODING = 'UTF-8' def _ParseLogLine(self, parser_mediator, structure): """Extracts events from a log line. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure parsed from the log file. """ # Required fields are <mru> and <atime> and we are not interested in # log lines without <mru>. mru = self._GetValueFromStructure(structure, 'mru') if not mru: return event_data = PopularityContestEventData() event_data.mru = mru event_data.package = self._GetValueFromStructure(structure, 'package') event_data.record_tag = self._GetValueFromStructure(structure, 'tag') # The <atime> field (as <ctime>) is always present but could be 0. # In case of <atime> equal to 0, we are in <NOFILES> case, safely return # without logging. access_time = self._GetValueFromStructure(structure, 'atime') if access_time: # TODO: not doing any check on <tag> fields, even if only informative # probably it could be better to check for the expected values. date_time = dfdatetime_posix_time.PosixTime(timestamp=access_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_LAST_ACCESS) parser_mediator.ProduceEventWithEventData(event, event_data) change_time = self._GetValueFromStructure(structure, 'ctime') if change_time: date_time = dfdatetime_posix_time.PosixTime(timestamp=change_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ENTRY_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == 'logline': self._ParseLogLine(parser_mediator, structure) else: timestamp = self._GetValueFromStructure(structure, 'timestamp') if timestamp is None: logger.debug('[{0:s}] {1:s} with invalid timestamp.'.format( self.NAME, key)) return session = self._GetValueFromStructure(structure, 'session') event_data = PopularityContestSessionEventData() # TODO: determine why session is formatted as a string. event_data.session = '{0!s}'.format(session) if key == 'header': event_data.details = self._GetValueFromStructure( structure, 'details') event_data.hostid = self._GetValueFromStructure( structure, 'id') event_data.status = 'start' elif key == 'footer': event_data.status = 'end' date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Popularity Contest log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ try: self.HEADER.parseString(line) except pyparsing.ParseException: logger.debug('Not a Popularity Contest log file, invalid header') return False return True