def _parse_tables(report_str: str) -> Dict[str, str]: """Parse the tables from a fitter report Keys are the title of the table, values are the table body """ hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd() title = ( pp.lineStart() + ";" + pp.SkipTo(";")("title").setParseAction(pp.tokenMap(str.strip)) + ";" + pp.lineEnd() ) # Grab everything until the next horizontal line(s). Tables with # column headings will have a horizontal line after the headings and # at the end of the table. Odd tables without section headings will # only have a single horizontal line. data = pp.SkipTo(hline, failOn=pp.lineEnd() * 2, include=True) table = hline + title + pp.Combine(hline + data * (1, 2))("body") # Make line endings significant table.setWhitespaceChars(" \t") result = {t.title: t.body for t in table.searchString(report_str)} return result
def _parse_map_tables(report_str: str) -> Dict[str, str]: """ Parse the tables from a ISE map report. Keys are the title of the table, values are the table body. """ # Capture the title from section headings like: # # Section 12 - Control Set Information # ------------------------------------ title = ( pp.lineStart() + "Section" + ppc.integer + "-" + pp.SkipTo(pp.lineEnd())("title").setParseAction(pp.tokenMap(str.strip)) + pp.lineEnd() ) sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd() * (1,)) # Table horizontal lines like # +-------------------------------+ hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd() # Most tables will have the format # +-----------------------+ # | Col 1 | Col 2 | Col 3 | # +-----------------------+ # | D1 | D2 | D3 | # ... # +-----------------------+ # # However "Control Set Information" appears to use horizontal lines to # separate clocks within the data section. Therefore, just grab # everything until a horizontal line followed by a blank line rather # than something more precise. table = pp.Combine(hline + pp.SkipTo(hline + pp.LineEnd(), include=True))( "body" ) table_section = title + sec_hline + table # Make line endings significant table_section.setWhitespaceChars(" \t") result = {t.title: t.body for t in table_section.searchString(report_str)} return result
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser): """Parses events from Bash history files.""" NAME = u'bash' DESCRIPTION = u'Parser for Bash history files' _ENCODING = u'utf-8' _TIMESTAMP = pyparsing.Suppress(u'#') + pyparsing.Word( pyparsing.nums, min=9, max=10).setParseAction( text_parser.PyParseIntCast).setResultsName(u'timestamp') _COMMAND = pyparsing.Regex(r'.*?(?=($|\n#\d{10}))', re.DOTALL).setResultsName(u'command') _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd() _VERIFICATION_GRAMMAR = (pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) + _TIMESTAMP + pyparsing.NotAny(pyparsing.pythonStyleComment)) LINE_STRUCTURES = [(u'log_entry', _LINE_GRAMMAR)] def ParseRecord(self, mediator, key, structure): """Parses a record and produces a Bash history event. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: UnableToParseFile: if an unsupported key is provided. """ if key != u'log_entry': raise errors.UnableToParseFile( u'Unsupported key: {0:s}'.format(key)) event = BashHistoryEvent(structure.timestamp, structure.command) mediator.ProduceEvent(event) def VerifyStructure(self, unused_mediator, line): """Verifies that this is a bash history file. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. line (str): single line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ match_generator = self._VERIFICATION_GRAMMAR.scanString(line, maxMatches=1) return bool(list(match_generator))
def __init__(self): """constructor""" """make LAD parser""" self.NwNumber = pp.Word(pp.nums, max=1).setParseAction(pp.tokenMap(int)).setBreak(False) self.Nw = pp.CaselessLiteral('NW:') + self.NwNumber + pp.Suppress(pp.lineEnd()) self.Ope_I = pp.Combine(pp.CaselessLiteral('I') + pp.Word(pp.nums, max=2)) self.Ope_O = pp.Combine(pp.CaselessLiteral('O') + pp.Word(pp.nums, max=2)) self.Ope_M = pp.Combine(pp.CaselessLiteral('M') + pp.Word(pp.nums, max=2)) self.Ope = self.Ope_I | self.Ope_O | self.Ope_M self.Command_LD = (pp.CaselessKeyword('LDN') | pp.CaselessKeyword ('LD')) + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_AND = (pp.CaselessKeyword('ANDN') | pp.CaselessKeyword ('AND')) + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_OR = (pp.CaselessKeyword('ORN') | pp.CaselessKeyword('OR')) + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_OUT = pp.CaselessKeyword('OUT') + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_BSAND = pp.CaselessKeyword('BSAND') + pp.Suppress(pp.lineEnd()) self.Command_BFAND = pp.CaselessKeyword('BFAND') + pp.Suppress(pp.lineEnd()) self.Command_BSOR = pp.CaselessKeyword('BSOR') + pp.Suppress(pp.lineEnd()) self.Command_BFOR = pp.CaselessKeyword('BFOR') + pp.Suppress(pp.lineEnd()) self.Command_LDOR = self.Command_LD + self.Command_OR * (0, 7) self.Command_ANDOR = self.Command_AND + self.Command_OR * (0, 7) self.Command_LDAND = self.Command_LDOR + self.Command_ANDOR * (0, 7) self.Complex = pp.Forward() self.Block = pp.Group((self.Complex | self.Command_LDAND) + pp.Optional(self.Command_ANDOR * (0, 7))) self.ComplexOR = self.Command_BSOR + self.Block + self.Block + self.Command_BFOR self.ComplexAND = self.Command_BSAND + self.Block + self.Block + self.Command_BFAND self.Complex <<= self.ComplexOR | self.ComplexAND self.NwProgram = pp.Group(self.Nw + self.Block + self.Command_OUT) self.Program = pp.OneOrMore(self.NwProgram)
class TestPyparsingSingleLineTextParser( text_parser.PyparsingSingleLineTextParser): """Single line PyParsing-based text parser for testing purposes.""" _ENCODING = 'utf-8' _LINE = pyparsing.Regex('.*') + pyparsing.lineEnd() LINE_STRUCTURES = [('line', _LINE)] def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. This function takes as an input a parsed pyparsing structure and produces an EventObject if possible from that structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): tokens from a parsed log line. """ return def VerifyStructure(self, parser_mediator, line): """Verify the structure of the file and return boolean based on that check. This function should read enough text from the text file to confirm that the file is the correct one for this particular parser. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): single line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ return True
def _parse_utilization_tables(util_str: str) -> Dict[str, str]: """ Find all of the section titles and tables in a Vivado utilization report. These are returned as a dict with the section titles as keys and the table as the value. """ # Find section headings, discarding the number and following horizontal # line. For example: # # 1.1 Summary of Registers by Type # -------------------------------- sec_num = pp.Suppress(pp.lineStart() + pp.Word(pp.nums + ".")) sec_title = sec_num + pp.SkipTo( pp.lineEnd())("title") + pp.lineEnd().suppress() # ------------------------------- sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd()) sec_head = sec_title + sec_hline + pp.lineEnd().suppress() # Tables use horizontal lines with like the following to mark column # headings and the end of the table: # # +------+------+-------+ table_hline = pp.lineStart() + pp.Word("+", "-+") + pp.lineEnd() # Tables may just be a header with no data rows, or a full header and # data rows, so there will be one or two more horizontal lines. data = pp.SkipTo(table_hline, failOn=pp.lineEnd() * 2, include=True) table = pp.Combine(table_hline + data * (1, 2)) section = sec_head + table("table") # Make line endings significant section.setWhitespaceChars(" \t") table_dict = { x["title"]: x["table"] for x in section.searchString(util_str) } return table_dict
def build_parser(self): number = ppc.fraction | ppc.number short_hex_color = pp.Suppress('#') + pp.Word(pp.nums + pp.hexnums, exact=3) short_hex_color.addParseAction( lambda t: tuple(int(ch + ch, 16) for ch in t[0])) long_hex_color = pp.Suppress('#') + pp.Word(pp.nums + pp.hexnums, exact=6) long_hex_color.addParseAction(self.long_hex_color) hex_color = long_hex_color | short_hex_color hex_color.addParseAction(lambda: self._set_colorspace('rgb')) int_or_percent = (ppc.integer + pp.Literal('%')('percent')) | ppc.integer int_or_percent.addParseAction(lambda t: t[0] * 255 / 100 if t.percent else t[0]) rgb_color_keyword = pp.Suppress('rgba(') | pp.Suppress('rgb(') rgb_color = rgb_color_keyword + pp.delimitedList( int_or_percent) + pp.Suppress(')') rgb_color.addParseAction(lambda t: (t[0], t[1], t[2])) rgb_color.addParseAction(lambda: self._set_colorspace('rgb')) jmh_color = pp.Suppress('jmh(') + pp.delimitedList( int_or_percent) + pp.Suppress(')') jmh_color.addParseAction(lambda t: (t[0], t[1], t[2])) jmh_color.addParseAction(lambda: self._set_colorspace('jmh')) color = hex_color ^ rgb_color ^ jmh_color grad_point = number('x') + pp.Optional(':') + color('y') grad_point.addParseAction(lambda t: self.grad_points.append( (t.x, t.y))) grad_points = pp.OneOrMore(grad_point + pp.lineEnd()) return grad_points
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser): """Parses SkyDrive log files.""" NAME = 'skydrive_log' DESCRIPTION = 'Parser for OneDrive (or SkyDrive) log files.' _ENCODING = 'utf-8' # Common SDF (SkyDrive Format) structures. _COMMA = pyparsing.Literal(',').suppress() _HYPHEN = text_parser.PyparsingConstants.HYPHEN _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction( text_parser.PyParseIntCast) IGNORE_FIELD = pyparsing.CharsNotIn(',').suppress() # Date and time format used in the header is: YYYY-MM-DD-hhmmss.### # For example: 2013-07-25-160323.291 _SDF_HEADER_DATE_TIME = pyparsing.Group( text_parser.PyparsingConstants.DATE_ELEMENTS + _HYPHEN + _TWO_DIGITS.setResultsName('hours') + _TWO_DIGITS.setResultsName('minutes') + _TWO_DIGITS.setResultsName('seconds') + pyparsing.Literal('.').suppress() + _THREE_DIGITS.setResultsName('milliseconds')).setResultsName( 'header_date_time') # Date and time format used in lines other than the header is: # MM-DD-YY,hh:mm:ss.### # For example: 07-25-13,16:06:31.820 _SDF_DATE_TIME = ( _TWO_DIGITS.setResultsName('month') + _HYPHEN + _TWO_DIGITS.setResultsName('day') + _HYPHEN + _TWO_DIGITS.setResultsName('year') + _COMMA + text_parser.PyparsingConstants.TIME_ELEMENTS + pyparsing.Suppress('.') + _THREE_DIGITS.setResultsName('milliseconds')).setResultsName( 'date_time') _SDF_HEADER_START = ( pyparsing.Literal('######').suppress() + pyparsing.Literal('Logging started.').setResultsName('log_start')) # Multiline entry end marker, matched from right to left. _SDF_ENTRY_END = pyparsing.StringEnd() | _SDF_HEADER_START | _SDF_DATE_TIME _SDF_LINE = ( _SDF_DATE_TIME + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + pyparsing.CharsNotIn(',').setResultsName('module') + _COMMA + pyparsing.CharsNotIn(',').setResultsName('source_code') + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + pyparsing.CharsNotIn(',').setResultsName('log_level') + _COMMA + pyparsing.SkipTo(_SDF_ENTRY_END).setResultsName('detail') + pyparsing.ZeroOrMore(pyparsing.lineEnd())) _SDF_HEADER = ( _SDF_HEADER_START + pyparsing.Literal('Version=').setResultsName('version_string') + pyparsing.Word(pyparsing.nums + '.').setResultsName('version_number') + pyparsing.Literal('StartSystemTime:').suppress() + _SDF_HEADER_DATE_TIME + pyparsing.Literal('StartLocalTime:').setResultsName( 'local_time_string') + pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName('details') + pyparsing.lineEnd()) LINE_STRUCTURES = [ ('logline', _SDF_LINE), ('header', _SDF_HEADER) ] def _ParseHeader(self, parser_mediator, structure): """Parse header lines and store appropriate attributes. ['Logging started.', 'Version=', '17.0.2011.0627', [2013, 7, 25], 16, 3, 23, 291, 'StartLocalTime', '<details>'] Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(structure.header_date_time)) return event_data = SkyDriveLogEventData() # TODO: refactor detail to individual event data attributes. event_data.detail = '{0:s} {1:s} {2:s} {3:s} {4:s}'.format( structure.log_start, structure.version_string, structure.version_number, structure.local_time_string, structure.details) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def _ParseLine(self, parser_mediator, structure): """Parses a logline and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ # TODO: Verify if date and time value is locale dependent. month, day_of_month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) year += 2000 time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, milliseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(structure.date_time)) return event_data = SkyDriveLogEventData() # Replace newlines with spaces in structure.detail to preserve output. # TODO: refactor detail to individual event data attributes. event_data.detail = structure.detail.replace('\n', ' ') event_data.log_level = structure.log_level event_data.module = structure.module event_data.source_code = structure.source_code event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in ('header', 'logline'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'logline': self._ParseLine(parser_mediator, structure) elif key == 'header': self._ParseHeader(parser_mediator, structure) def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a SkyDrive log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._SDF_HEADER.parseString(lines) except pyparsing.ParseException: logger.debug('Not a SkyDrive log file') return False try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: logger.debug( 'Not a SkyDrive log file, invalid date and time: {0!s}'.format( structure.header_date_time)) return False return True
# C++ Syntax Description import pyparsing as pp from cpp_lang import * from cpp_builders import * from pp_utils import * # comments need to be removed comment = (pp.cStyleComment | pp.cppStyleComment) preprocessor = pp.lineStart() + pp.Word('#', pp.alphas) + pp.SkipTo( pp.lineEnd() ) preprocessor.setWhitespaceChars(' \r\t') identifier = pp.Word( pp.alphas + '_', pp.alphanums + '_' ) persistency = pp.Keyword('static' ).setParseAction( pp.replaceWith(TypeArgs.STATIC_TYPE) ) volatility = pp.Keyword('const' ).setParseAction( pp.replaceWith(TypeArgs.CONST_TYPE ) ) \ | pp.Keyword('volatile').setParseAction( pp.replaceWith(TypeArgs.VOLATILE_TYPE) ) reference = pp.Literal('*').setParseAction( pp.replaceWith(CppPointerTypeExpression.POINTER_VAR ) ) \ | pp.Literal('&').setParseAction( pp.replaceWith(CppPointerTypeExpression.REFERENCE_VAR) ) # member function on const object const_function = pp.Keyword('const' ).setParseAction( pp.replaceWith(FunctionArgs.CONST_FUNCTION ) ) virtual_function = pp.Keyword('virtual').setParseAction( pp.replaceWith(FunctionArgs.VIRTUAL_FUNCTION ) ) destructor_tag = pp.Literal('~' ).setParseAction( pp.replaceWith(FunctionArgs.DESTRUCTOR_FUNCTION) ) abstract_function = (pp.Literal('=') + pp.Literal('0')).setParseAction( pp.replaceWith(FunctionArgs.ABSTRACT_FUNCTION)) inline_function = pp.Keyword('inline').setParseAction( pp.replaceWith(FunctionArgs.INLINE_FUNCTION) ) complex_type = pp.Keyword('class' ).setParseAction(pp.replaceWith(CppComplexTypeDefinition.CLASS )) \ | pp.Keyword('struct').setParseAction(pp.replaceWith(CppComplexTypeDefinition.STRUCT)) \ | pp.Keyword('union' ).setParseAction(pp.replaceWith(CppComplexTypeDefinition.UNION ))
class GoogleDriveSyncLogParser(text_parser.PyparsingMultiLineTextParser): """Parses events from Google Drive Sync log files.""" NAME = 'gdrive_synclog' DATA_FORMAT = 'Google Drive Sync log file' _ENCODING = 'utf-8' # Increase the buffer size, as log messages are often many lines of Python # object dumps or similar. The default is too small for this and results in # premature end of string matching on multi-line log entries. BUFFER_SIZE = 16384 _HYPHEN = text_parser.PyparsingConstants.HYPHEN _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS _GDS_DATE_TIME = pyparsing.Group( _FOUR_DIGITS.setResultsName('year') + _HYPHEN + _TWO_DIGITS.setResultsName('month') + _HYPHEN + _TWO_DIGITS.setResultsName('day') + text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS + pyparsing.Word(pyparsing.printables).setResultsName('time_zone_offset') ).setResultsName('date_time') # Multiline entry end marker, matched from right to left. _GDS_ENTRY_END = pyparsing.StringEnd() | _GDS_DATE_TIME _GDS_LINE = ( _GDS_DATE_TIME + pyparsing.Word(pyparsing.alphas).setResultsName('log_level') + # TODO: strip pid= out, cast to integers? pyparsing.Word(pyparsing.printables).setResultsName('pid') + # TODO: consider stripping thread identifier/cleaning up thread name? pyparsing.Word(pyparsing.printables).setResultsName('thread') + pyparsing.Word(pyparsing.printables).setResultsName('source_code') + pyparsing.SkipTo(_GDS_ENTRY_END).setResultsName('message') + pyparsing.ZeroOrMore(pyparsing.lineEnd())) LINE_STRUCTURES = [ ('logline', _GDS_LINE), ] def _GetISO8601String(self, structure): """Retrieves an ISO 8601 date time string from the structure. The date and time values in Google Drive Sync log files are formatted as: "2018-01-24 18:25:08,454 -0800". Args: structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file, that contains the time elements. Returns: str: ISO 8601 date time string. Raises: ValueError: if the structure cannot be converted into a date time string. """ time_zone_offset = self._GetValueFromStructure(structure, 'time_zone_offset') try: time_zone_offset_hours = int(time_zone_offset[1:3], 10) time_zone_offset_minutes = int(time_zone_offset[3:5], 10) except (IndexError, TypeError, ValueError) as exception: raise ValueError( 'unable to parse time zone offset with error: {0!s}.'.format( exception)) year = self._GetValueFromStructure(structure, 'year') month = self._GetValueFromStructure(structure, 'month') day_of_month = self._GetValueFromStructure(structure, 'day') hours = self._GetValueFromStructure(structure, 'hours') minutes = self._GetValueFromStructure(structure, 'minutes') seconds = self._GetValueFromStructure(structure, 'seconds') microseconds = self._GetValueFromStructure(structure, 'microseconds') try: iso8601 = ( '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}.{6:03d}' '{7:s}{8:02d}:{9:02d}').format(year, month, day_of_month, hours, minutes, seconds, microseconds, time_zone_offset[0], time_zone_offset_hours, time_zone_offset_minutes) except (TypeError, ValueError) as exception: raise ValueError( 'unable to format date time string with error: {0!s}.'.format( exception)) return iso8601 def _ParseRecordLogline(self, parser_mediator, structure): """Parses a logline record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ date_time = dfdatetime_time_elements.TimeElementsInMilliseconds() time_elements_structure = self._GetValueFromStructure( structure, 'date_time') try: datetime_iso8601 = self._GetISO8601String(time_elements_structure) date_time.CopyFromStringISO8601(datetime_iso8601) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format( time_elements_structure)) return # Replace newlines with spaces in structure.message to preserve output. message = self._GetValueFromStructure(structure, 'message') if message: message = message.replace('\n', ' ') event_data = GoogleDriveSyncLogEventData() event_data.log_level = self._GetValueFromStructure( structure, 'log_level') event_data.pid = self._GetValueFromStructure(structure, 'pid') event_data.thread = self._GetValueFromStructure(structure, 'thread') event_data.source_code = self._GetValueFromStructure( structure, 'source_code') event_data.message = message event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key != 'logline': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) self._ParseRecordLogline(parser_mediator, structure) def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a Google Drive Sync log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._GDS_LINE.parseString(lines) except pyparsing.ParseException as exception: logger.debug( 'Not a Google Drive Sync log file: {0!s}'.format(exception)) return False date_time = dfdatetime_time_elements.TimeElementsInMilliseconds() date_time_string = self._GetValueFromStructure(structure, 'date_time') try: datetime_iso8601 = self._GetISO8601String(date_time_string) date_time.CopyFromStringISO8601(datetime_iso8601) except ValueError as exception: logger.debug( ('Not a Google Drive Sync log file, invalid date/time: {0!s} ' 'with error: {1!s}').format(date_time_string, exception)) return False return True
class ApacheAccessParser(text_parser.PyparsingSingleLineTextParser): """Apache access log file parser""" NAME = 'apache_access' DESCRIPTION = 'Apache access Parser' MAX_LINE_LENGTH = 2048 # Date format [18/Sep/2011:19:18:28 -0400] _DATE_TIME = pyparsing.Group( pyparsing.Suppress('[') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('day') + pyparsing.Suppress('/') + text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') + pyparsing.Suppress('/') + text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year') + pyparsing.Suppress(':') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hours') + pyparsing.Suppress(':') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minutes') + pyparsing.Suppress(':') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('seconds') + pyparsing.Combine( pyparsing.oneOf(['-', '+']) + pyparsing.Word( pyparsing.nums, exact=4)).setResultsName('time_offset') + pyparsing.Suppress(']')).setResultsName('date_time') _HTTP_REQUEST = (pyparsing.Suppress('"') + pyparsing.SkipTo('"').setResultsName('http_request') + pyparsing.Suppress('"')) _REMOTE_NAME = (pyparsing.Word(pyparsing.alphanums) | pyparsing.Literal('-')).setResultsName('remote_name') _RESPONSE_BYTES = (pyparsing.Literal('-') | text_parser.PyparsingConstants.INTEGER ).setResultsName('response_bytes') _REFERER = (pyparsing.Suppress('"') + pyparsing.SkipTo('"').setResultsName('referer') + pyparsing.Suppress('"')) _USER_AGENT = (pyparsing.Suppress('"') + pyparsing.SkipTo('"').setResultsName('user_agent') + pyparsing.Suppress('"')) _USER_NAME = (pyparsing.Word(pyparsing.alphanums) | pyparsing.Literal('-')).setResultsName('user_name') # Defined in https://httpd.apache.org/docs/2.4/logs.html # format: "%h %l %u %t \"%r\" %>s %b" _COMMON_LOG_FORMAT_LINE = ( text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST + text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') + _RESPONSE_BYTES + pyparsing.lineEnd()) # Defined in https://httpd.apache.org/docs/2.4/logs.html # format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" _COMBINED_LOG_FORMAT_LINE = ( text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST + text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') + _RESPONSE_BYTES + _REFERER + _USER_AGENT + pyparsing.lineEnd()) LINE_STRUCTURES = [('combined_log_format', _COMBINED_LOG_FORMAT_LINE), ('common_log_format', _COMMON_LOG_FORMAT_LINE)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) # TODO: migrate function after dfdatetime issue #47 is fixed. def _GetISO8601String(self, structure): """Normalize date time parsed format to an ISO 8601 date time string. The date and time values in Apache access log files are formatted as: "[18/Sep/2011:19:18:28 -0400]". Args: structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Returns: str: ISO 8601 date time string. Raises: ValueError: if the structure cannot be converted into a date time string. """ month = self._GetValueFromStructure(structure, 'month') try: month = timelib.MONTH_DICT.get(month.lower(), 0) except AttributeError as exception: raise ValueError( 'unable to parse month with error: {0!s}.'.format(exception)) time_offset = self._GetValueFromStructure(structure, 'time_offset') try: time_offset_hours = int(time_offset[1:3], 10) time_offset_minutes = int(time_offset[3:5], 10) except (IndexError, TypeError, ValueError) as exception: raise ValueError( 'unable to parse time zone offset with error: {0!s}.'.format( exception)) year = self._GetValueFromStructure(structure, 'year') day_of_month = self._GetValueFromStructure(structure, 'day') hours = self._GetValueFromStructure(structure, 'hours') minutes = self._GetValueFromStructure(structure, 'minutes') seconds = self._GetValueFromStructure(structure, 'seconds') try: date_time_string = ( '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}.000000' '{6:s}{7:02d}:{8:02d}').format(year, month, day_of_month, hours, minutes, seconds, time_offset[0], time_offset_hours, time_offset_minutes) except ValueError as exception: raise ValueError( 'unable to format date time string with error: {0!s}.'.format( exception)) return date_time_string def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) date_time = dfdatetime_time_elements.TimeElements() date_time_string = self._GetValueFromStructure(structure, 'date_time') try: iso_date_time = self._GetISO8601String(date_time_string) date_time.CopyFromStringISO8601(iso_date_time) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(date_time_string)) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_RECORDED) event_data = ApacheAccessEventData() event_data.ip_address = self._GetValueFromStructure( structure, 'ip_address') event_data.remote_name = self._GetValueFromStructure( structure, 'remote_name') event_data.user_name = self._GetValueFromStructure( structure, 'user_name') event_data.http_request = self._GetValueFromStructure( structure, 'http_request') event_data.http_response_code = self._GetValueFromStructure( structure, 'response_code') event_data.http_response_bytes = self._GetValueFromStructure( structure, 'response_bytes') if key == 'combined_log_format': event_data.http_request_referer = self._GetValueFromStructure( structure, 'referer') event_data.http_request_user_agent = self._GetValueFromStructure( structure, 'user_agent') parser_mediator.ProduceEventWithEventData(event, event_data) # pylint: disable=unused-argument def VerifyStructure(self, parser_mediator, line): """Verifies that this is an apache access log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ return max( [parser.matches(line) for _, parser in self.LINE_STRUCTURES])
class APTHistoryLogParser(text_parser.PyparsingSingleLineTextParser): """Parses for Advanced Packaging Tool (APT) History log files.""" NAME = 'apt_history' DATA_FORMAT = 'Advanced Packaging Tool (APT) History log file' # APT History log lines can be very long. MAX_LINE_LENGTH = 65536 _ENCODING = 'utf-8' _HYPHEN = text_parser.PyparsingConstants.HYPHEN _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS _APTHISTORY_DATE_TIME = pyparsing.Group(_FOUR_DIGITS + _HYPHEN + _TWO_DIGITS + _HYPHEN + _TWO_DIGITS + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS) _RECORD_START = ( # APT History logs may start with empty lines pyparsing.ZeroOrMore(pyparsing.lineEnd()) + pyparsing.Literal('Start-Date:') + _APTHISTORY_DATE_TIME.setResultsName('start_date') + pyparsing.lineEnd()) _RECORD_BODY = (pyparsing.MatchFirst([ pyparsing.Literal('Commandline:'), pyparsing.Literal('Downgrade:'), pyparsing.Literal('Error:'), pyparsing.Literal('Install:'), pyparsing.Literal('Purge:'), pyparsing.Literal('Remove:'), pyparsing.Literal('Requested-By:'), pyparsing.Literal('Upgrade:') ]) + pyparsing.restOfLine()) _RECORD_END = (pyparsing.Literal('End-Date:') + _APTHISTORY_DATE_TIME.setResultsName('end_date') + pyparsing.OneOrMore(pyparsing.lineEnd())) LINE_STRUCTURES = [('record_start', _RECORD_START), ('record_body', _RECORD_BODY), ('record_end', _RECORD_END)] def __init__(self): """Initializes an APT History parser.""" super(APTHistoryLogParser, self).__init__() self._date_time = None self._event_data = None self._downgrade = None self._install = None self._purge = None self._remove = None self._upgrade = None @staticmethod def _BuildDateTime(time_elements_structure): """Builds time elements from an APT History time stamp. Args: time_elements_structure (pyparsing.ParseResults): structure of tokens derived from an APT History time stamp. Returns: dfdatetime.TimeElements: date and time extracted from the structure or None f the structure does not represent a valid string. """ # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with: # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults # overriding __getattr__ with a function that returns an empty string when # named token does not exists. try: year, month, day_of_month, hours, minutes, seconds = ( time_elements_structure) date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=(year, month, day_of_month, hours, minutes, seconds)) # APT History logs store date and time values in local time. date_time.is_local_time = True return date_time except (TypeError, ValueError): return None def _ParseRecordStart(self, parser_mediator, structure): """Parses the first line of a log record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a log entry. """ self._date_time = self._BuildDateTime(structure.get( 'start_date', None)) if not self._date_time: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(self._date_time)) return self._event_data = APTHistoryLogEventData() return def _ParseRecordBody(self, structure): """Parses a line from the body of a log record. Args: structure (pyparsing.ParseResults): structure of tokens derived from a log entry. Raises: ParseError: when the date and time value is missing. """ if not self._date_time: raise errors.ParseError('Missing date time value.') # Command data if structure[0] == 'Commandline:': self._event_data.command = ''.join(structure) elif structure[0] == 'Error:': self._event_data.error = ''.join(structure) elif structure[0] == 'Requested-By:': self._event_data.requester = ''.join(structure) # Package lists elif structure[0] == 'Downgrade:': self._downgrade = ''.join(structure) elif structure[0] == 'Install:': self._install = ''.join(structure) elif structure[0] == 'Purge:': self._purge = ''.join(structure) elif structure[0] == 'Remove:': self._remove = ''.join(structure) elif structure[0] == 'Upgrade:': self._upgrade = ''.join(structure) def _ParseRecordEnd(self, parser_mediator): """Parses the last line of a log record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. Raises: ParseError: when the date and time value is missing. """ if not self._date_time: raise errors.ParseError('Missing date time value.') # Create relevant events for record if self._downgrade: self._event_data.packages = self._downgrade event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_DOWNGRADE, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._install: self._event_data.packages = self._install event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_INSTALLATION, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._purge: self._event_data.packages = self._purge event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_DELETED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._remove: self._event_data.packages = self._remove event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_DELETED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._upgrade: self._event_data.packages = self._upgrade event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_UPDATE, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) def _ResetState(self): """Resets stored values in the parser.""" self._date_time = None self._downgrade = None self._event_data = None self._install = None self._purge = None self._remove = None self._upgrade = None def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a log entry. Raises: ParseError: when the structure type is unknown. """ if key == 'record_start': self._ParseRecordStart(parser_mediator, structure) return if key == 'record_body': self._ParseRecordBody(structure) return if key == 'record_end': self._ParseRecordEnd(parser_mediator) # Reset for next record. self._ResetState() return raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) def VerifyStructure(self, parser_mediator, line): """Verify that this file is an APT History log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): single line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: self._RECORD_START.parseString(line) # Reset stored values for parsing a new file. self._ResetState() except pyparsing.ParseException as exception: logger.debug( 'Not an APT History log file: {0!s}'.format(exception)) return False return True
class SyslogParser(text_parser.PyparsingMultiLineTextParser): """Parses syslog formatted log files""" NAME = u'syslog' DESCRIPTION = u'Syslog Parser' _ENCODING = u'utf-8' _VERIFICATION_REGEX = re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s') _plugin_classes = {} # The reporter and facility fields can contain any printable character, but # to allow for processing of syslog formats that delimit the reporter and # facility with printable characters, we remove certain common delimiters # from the set of printable characters. _REPORTER_CHARACTERS = u''.join( [c for c in pyparsing.printables if c not in [u':', u'[', u'<']]) _FACILITY_CHARACTERS = u''.join( [c for c in pyparsing.printables if c not in [u':', u'>']]) _PYPARSING_COMPONENTS = { u'month': text_parser.PyparsingConstants.MONTH.setResultsName(u'month'), u'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( u'day'), u'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( u'hour'), u'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( u'minute'), u'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( u'second'), u'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName( u'fractional_seconds'), u'hostname': pyparsing.Word(pyparsing.printables).setResultsName( u'hostname'), u'reporter': pyparsing.Word(_REPORTER_CHARACTERS).setResultsName( u'reporter'), u'pid': text_parser.PyparsingConstants.PID.setResultsName(u'pid'), u'facility': pyparsing.Word(_FACILITY_CHARACTERS).setResultsName( u'facility'), u'body': pyparsing.Regex( r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}))', re.DOTALL). setResultsName(u'body'), u'comment_body': pyparsing.SkipTo(u' ---').setResultsName( u'body') } _PYPARSING_COMPONENTS[u'date'] = ( _PYPARSING_COMPONENTS[u'month'] + _PYPARSING_COMPONENTS[u'day'] + _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'second'] + pyparsing.Optional( pyparsing.Suppress(u'.') + _PYPARSING_COMPONENTS[u'fractional_seconds'])) _SYSLOG_LINE = ( _PYPARSING_COMPONENTS[u'date'] + _PYPARSING_COMPONENTS[u'hostname'] + _PYPARSING_COMPONENTS[u'reporter'] + pyparsing.Optional( pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] + pyparsing.Suppress(u']')) + pyparsing.Optional( pyparsing.Suppress(u'<') + _PYPARSING_COMPONENTS[u'facility'] + pyparsing.Suppress(u'>')) + pyparsing.Optional(pyparsing.Suppress(u':')) + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) _SYSLOG_COMMENT = ( _PYPARSING_COMPONENTS[u'date'] + pyparsing.Suppress(u':') + pyparsing.Suppress(u'---') + _PYPARSING_COMPONENTS[u'comment_body'] + pyparsing.Suppress(u'---') + pyparsing.LineEnd()) _KERNEL_SYSLOG_LINE = ( _PYPARSING_COMPONENTS[u'date'] + pyparsing.Literal(u'kernel').setResultsName(u'reporter') + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) LINE_STRUCTURES = [ (u'syslog_line', _SYSLOG_LINE), (u'syslog_line', _KERNEL_SYSLOG_LINE), (u'syslog_comment', _SYSLOG_COMMENT)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser object.""" super(SyslogParser, self).__init__() self._last_month = 0 self._maximum_year = 0 self._plugin_objects_by_reporter = {} self._year_use = 0 def _UpdateYear(self, mediator, month): """Updates the year to use for events, based on last observed month. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. month (int): month observed by the parser, where January is 1. """ if not self._year_use: self._year_use = mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = mediator.GetLatestYear() if not self._last_month: self._last_month = month return # Some syslog daemons allow out-of-order sequences, so allow some leeway # to not cause Apr->May->Apr to cause the year to increment. # See http://bugzilla.adiscon.com/show_bug.cgi?id=527 if self._last_month > (month + 1): if self._year_use != self._maximum_year: self._year_use += 1 self._last_month = month def EnablePlugins(self, plugin_includes): """Enables parser plugins. Args: plugin_includes (list[str]): names of the plugins to enable, where None or an empty list represents all plugins. Note that the default plugin is handled separately. """ super(SyslogParser, self).EnablePlugins(plugin_includes) self._plugin_objects_by_reporter = {} for plugin_object in self._plugin_objects: self._plugin_objects_by_reporter[plugin_object.REPORTER] = plugin_object def ParseRecord(self, mediator, key, structure): """Parses a matching entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: UnableToParseFile: if an unsupported key is provided. """ if key not in self._SUPPORTED_KEYS: raise errors.UnableToParseFile(u'Unsupported key: {0:s}'.format(key)) month = timelib.MONTH_DICT.get(structure.month.lower(), None) if not month: mediator.ProduceParserError( u'Invalid month value: {0:s}'.format(month)) return self._UpdateYear(mediator, month) timestamp = timelib.Timestamp.FromTimeParts( year=self._year_use, month=month, day=structure.day, hour=structure.hour, minutes=structure.minute, seconds=structure.second, timezone=mediator.timezone) if key == u'syslog_comment': comment_attributes = {u'body': structure.body} event = SyslogCommentEvent(timestamp, 0, comment_attributes) mediator.ProduceEvent(event) return reporter = structure.reporter attributes = { u'hostname': structure.hostname, u'reporter': reporter, u'pid': structure.pid, u'body': structure.body} plugin_object = self._plugin_objects_by_reporter.get(reporter, None) if not plugin_object: event_object = SyslogLineEvent(timestamp, 0, attributes) mediator.ProduceEvent(event_object) else: try: plugin_object.Process(mediator, timestamp, attributes) except errors.WrongPlugin: event_object = SyslogLineEvent(timestamp, 0, attributes) mediator.ProduceEvent(event_object) def VerifyStructure(self, unused_mediator, line): """Verifies that this is a syslog-formatted file. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. line (str): single line from the text file. Returns: bool: whether the line appears to contain syslog content. """ return re.match(self._VERIFICATION_REGEX, line) is not None
class SyslogParser(text_parser.PyparsingMultiLineTextParser): """Parses syslog formatted log files""" NAME = u'syslog' DESCRIPTION = u'Syslog Parser' _VERIFICATION_REGEX = re.compile(r'^\w{3}\s\d{2}\s\d{2}:\d{2}:\d{2}\s') _plugin_classes = {} _PYPARSING_COMPONENTS = { u'month': text_parser.PyparsingConstants.MONTH.setResultsName(u'month'), u'day': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'day'), u'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'hour'), u'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'minute'), u'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'second'), u'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName(u'fractional_seconds'), u'hostname': pyparsing.Word(pyparsing.printables).setResultsName(u'hostname'), u'reporter': pyparsing.Word(pyparsing.alphanums + u'.').setResultsName(u'reporter'), u'pid': text_parser.PyparsingConstants.PID.setResultsName(u'pid'), u'facility': pyparsing.Word(pyparsing.alphanums).setResultsName(u'facility'), u'body': pyparsing.Regex(r'.*?(?=($|\n\w{3}\s\d{2}\s\d{2}:\d{2}:\d{2}))', re.DOTALL).setResultsName(u'body'), u'comment_body': pyparsing.SkipTo(u' ---').setResultsName(u'body') } _PYPARSING_COMPONENTS[u'date'] = ( _PYPARSING_COMPONENTS[u'month'] + _PYPARSING_COMPONENTS[u'day'] + _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'second'] + pyparsing.Optional( pyparsing.Suppress(u'.') + _PYPARSING_COMPONENTS[u'fractional_seconds'])) _LINE_GRAMMAR = ( _PYPARSING_COMPONENTS[u'date'] + _PYPARSING_COMPONENTS[u'hostname'] + _PYPARSING_COMPONENTS[u'reporter'] + pyparsing.Optional( pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] + pyparsing.Suppress(u']')) + pyparsing.Optional( pyparsing.Suppress(u'<') + _PYPARSING_COMPONENTS[u'facility'] + pyparsing.Suppress(u'>')) + pyparsing.Optional(pyparsing.Suppress(u':')) + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS[u'date'] + pyparsing.Suppress(u':') + pyparsing.Suppress(u'---') + _PYPARSING_COMPONENTS[u'comment_body'] + pyparsing.Suppress(u'---') + pyparsing.LineEnd()) LINE_STRUCTURES = [(u'syslog_line', _LINE_GRAMMAR), (u'syslog_comment', _SYSLOG_COMMENT)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser object.""" super(SyslogParser, self).__init__() self._last_month = 0 self._maximum_year = 0 self._plugin_objects_by_reporter = {} self._year_use = 0 def _UpdateYear(self, parser_mediator, month): """Updates the year to use for events, based on last observed month. Args: parser_mediator: a parser mediator object (instance of ParserMediator). month: an integer containing the month observed by the parser, where January is 1. """ if not self._year_use: self._year_use = parser_mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = parser_mediator.GetLatestYear() if not self._last_month: self._last_month = month return # Some syslog daemons allow out-of-order sequences, so allow some leeway # to not cause Apr->May->Apr to cause the year to increment. # See http://bugzilla.adiscon.com/show_bug.cgi?id=527 if self._last_month > (month + 1): if self._year_use != self._maximum_year: self._year_use += 1 self._last_month = month def EnablePlugins(self, plugin_includes): """Enables parser plugins. Args: plugin_includes: a list of strings containing the names of the plugins to enable, where None or an empty list represents all plugins. Not that the default plugin is handled separately. """ super(SyslogParser, self).EnablePlugins(plugin_includes) self._plugin_objects_by_reporter = {} for plugin_object in self._plugin_objects: self._plugin_objects_by_reporter[ plugin_object.REPORTER] = plugin_object def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator: a parser mediator object (instance of ParserMediator). key: a string containing the name of the parsed structure. structure: the elements parsed from the file (instance of pyparsing.ParseResults). Raises: UnableToParseFile: if an unsupported key is provided. """ if key not in self._SUPPORTED_KEYS: raise errors.UnableToParseFile( u'Unsupported key: {0:s}'.format(key)) month = timelib.MONTH_DICT.get(structure.month.lower(), None) if not month: parser_mediator.ProduceParserError( u'Invalid month value: {0:s}'.format(month)) return self._UpdateYear(parser_mediator, month) timestamp = timelib.Timestamp.FromTimeParts( year=self._year_use, month=month, day=structure.day, hour=structure.hour, minutes=structure.minute, seconds=structure.second, timezone=parser_mediator.timezone) if key == u'syslog_comment': comment_attributes = { u'hostname': u'', u'reporter': u'', u'pid': u'', u'body': structure.body } event = SyslogCommentEvent(timestamp, 0, comment_attributes) parser_mediator.ProduceEvent(event) return reporter = structure.reporter attributes = { u'hostname': structure.hostname, u'reporter': reporter, u'pid': structure.pid, u'body': structure.body } plugin_object = self._plugin_objects_by_reporter.get(reporter, None) if not plugin_object: event_object = SyslogLineEvent(timestamp, 0, attributes) parser_mediator.ProduceEvent(event_object) else: try: plugin_object.Process(parser_mediator, timestamp, attributes) except errors.WrongPlugin: event_object = SyslogLineEvent(timestamp, 0, attributes) parser_mediator.ProduceEvent(event_object) def VerifyStructure(self, parser_mediator, lines): """Verifies that this is a syslog-formatted file. Args: parser_mediator: a parser mediator object (instance of ParserMediator). lines: a buffer that contains content from the file. Returns: A boolean value to indicate that passed buffer appears to contain syslog content. """ return re.match(self._VERIFICATION_REGEX, lines) is not None
class SetupapiLogParser(text_parser.PyparsingMultiLineTextParser): """Parses events from Windows Setupapi log files.""" NAME = 'setupapi' DESCRIPTION = 'Parser for Windows Setupapi log files.' _ENCODING = 'utf-8' # Increase the buffer size, as log messages can be very long. BUFFER_SIZE = 262144 _SLASH = pyparsing.Literal('/').suppress() _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS _SETUPAPI_DATE_TIME = pyparsing.Group( _FOUR_DIGITS + _SLASH + _TWO_DIGITS + _SLASH + _TWO_DIGITS + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Word('.,', exact=1).suppress() + _THREE_DIGITS) _SETUPAPI_LINE = ( pyparsing.SkipTo('>>> [', include=True).suppress() + pyparsing.SkipTo(']').setResultsName('entry_type') + pyparsing.SkipTo('>>> Section start', include=True).suppress() + _SETUPAPI_DATE_TIME.setResultsName('start_time') + pyparsing.SkipTo('<<< Section end ').setResultsName('message') + pyparsing.GoToColumn(17) + _SETUPAPI_DATE_TIME.setResultsName('end_time') + pyparsing.SkipTo('<<< [Exit status: ', include=True).suppress() + pyparsing.SkipTo(']').setResultsName('entry_status') + pyparsing.SkipTo(pyparsing.lineEnd()) + pyparsing.ZeroOrMore(pyparsing.lineEnd())) LINE_STRUCTURES = [ ('logline', _SETUPAPI_LINE), ] def _ParseRecordLogline(self, parser_mediator, structure): """Parses a logline record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from log entry. """ time_zone = parser_mediator.timezone time_elements_structure = self._GetValueFromStructure( structure, 'start_time') try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_structure) # Setupapi logs stores date and time values in local time. date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format( time_elements_structure)) return event_data = SetupapiLogEventData() event_data.entry_type = self._GetValueFromStructure( structure, 'entry_type') event_data.entry_status = 'START' event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_START, time_zone=time_zone) # Create event for the start of the setupapi section parser_mediator.ProduceEventWithEventData(event, event_data) event_data.entry_status = self._GetValueFromStructure( structure, 'entry_status') time_elements_structure = self._GetValueFromStructure( structure, 'end_time') try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_structure) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format( time_elements_structure)) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_END, time_zone=time_zone) # Create event for the end of the setupapi section parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a log entry. Raises: ParseError: when the structure type is unknown. """ if key != 'logline': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) self._ParseRecordLogline(parser_mediator, structure) def VerifyStructure(self, parser_mediator, lines): """Verify that this file is a Windows Setupapi log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._SETUPAPI_LINE.parseString(lines) except pyparsing.ParseException as exception: logger.debug( 'Not a Windows Setupapi log file: {0!s}'.format(exception)) return False time_elements_structure = self._GetValueFromStructure( structure, 'start_time') try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_structure) except ValueError as exception: logger.debug( ('Not a Windows Setupapi log file, invalid date/time: {0!s} ' 'with error: {1!s}').format(time_elements_structure, exception)) return False if not date_time: logger.debug( ('Not a Windows Setupapi log file, ' 'invalid date/time: {0!s}').format(time_elements_structure)) return False return True
class SyslogParser(text_parser.PyparsingMultiLineTextParser): """Parses syslog formatted log files""" NAME = 'syslog' DESCRIPTION = 'Syslog Parser' _ENCODING = 'utf-8' _plugin_classes = {} # The reporter and facility fields can contain any printable character, but # to allow for processing of syslog formats that delimit the reporter and # facility with printable characters, we remove certain common delimiters # from the set of printable characters. _REPORTER_CHARACTERS = ''.join( [c for c in pyparsing.printables if c not in [':', '[', '<']]) _FACILITY_CHARACTERS = ''.join( [c for c in pyparsing.printables if c not in [':', '>']]) _SYSLOG_SEVERITY = [ 'EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG' ] # TODO: change pattern to allow only spaces as a field separator. _BODY_PATTERN = ( r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \ r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \ r'[\+|-]\d{2}:\d{2}\s))') # The rsyslog file format (RSYSLOG_FileFormat) consists of: # %TIMESTAMP% %HOSTNAME% %syslogtag%%msg% # # Where %TIMESTAMP% is in RFC-3339 date time format e.g. # 2020-05-31T00:00:45.698463+00:00 _RSYSLOG_VERIFICATION_PATTERN = (r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.' r'\d{6}[\+|-]\d{2}:\d{2} ' + _BODY_PATTERN) # The rsyslog traditional file format (RSYSLOG_TraditionalFileFormat) # consists of: # %TIMESTAMP% %HOSTNAME% %syslogtag%%msg% # # Where %TIMESTAMP% is in yearless ctime date time format e.g. # Jan 22 07:54:32 # TODO: change pattern to allow only spaces as a field separator. _RSYSLOG_TRADITIONAL_VERIFICATION_PATTERN = ( r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' + _BODY_PATTERN) # The Chrome OS syslog messages are of a format beginning with an # ISO 8601 combined date and time expression with timezone designator: # 2016-10-25T12:37:23.297265-07:00 # # This will then be followed by the SYSLOG Severity which will be one of: # EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG # # 2016-10-25T12:37:23.297265-07:00 INFO _CHROMEOS_VERIFICATION_PATTERN = ( r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.' r'\d{6}[\+|-]\d{2}:\d{2}\s' r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_PATTERN) # Bundle all verification patterns into a single regular expression. _VERIFICATION_REGEX = re.compile('({0:s})'.format('|'.join([ _CHROMEOS_VERIFICATION_PATTERN, _RSYSLOG_VERIFICATION_PATTERN, _RSYSLOG_TRADITIONAL_VERIFICATION_PATTERN ]))) _PYPARSING_COMPONENTS = { 'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'), 'two_digit_month': (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'two_digit_month')), 'month': text_parser.PyparsingConstants.MONTH.setResultsName('month'), 'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day'), 'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hour'), 'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'), 'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second'), 'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName('fractional_seconds'), 'hostname': pyparsing.Word(pyparsing.printables).setResultsName('hostname'), 'reporter': pyparsing.Word(_REPORTER_CHARACTERS).setResultsName('reporter'), 'pid': text_parser.PyparsingConstants.PID.setResultsName('pid'), 'facility': pyparsing.Word(_FACILITY_CHARACTERS).setResultsName('facility'), 'severity': pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'), 'body': pyparsing.Regex(_BODY_PATTERN, re.DOTALL).setResultsName('body'), 'comment_body': pyparsing.SkipTo(' ---').setResultsName('body') } _PYPARSING_COMPONENTS['date'] = ( _PYPARSING_COMPONENTS['month'] + _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Optional( pyparsing.Suppress('.') + _PYPARSING_COMPONENTS['fractional_seconds'])) _PYPARSING_COMPONENTS['rfc3339_datetime'] = pyparsing.Combine( pyparsing.Word(pyparsing.nums, exact=4) + pyparsing.Literal('-') + pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('-') + pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('T') + pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') + pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') + pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('.') + pyparsing.Word(pyparsing.nums, exact=6) + pyparsing.oneOf(['-', '+']) + pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Optional( pyparsing.Literal(':') + pyparsing.Word(pyparsing.nums, exact=2)), joinString='', adjacent=True) _CHROMEOS_SYSLOG_LINE = ( _PYPARSING_COMPONENTS['rfc3339_datetime'].setResultsName('datetime') + _PYPARSING_COMPONENTS['severity'] + _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional(pyparsing.Suppress(':')) + pyparsing.Optional( pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] + pyparsing.Suppress(']')) + pyparsing.Optional(pyparsing.Suppress(':')) + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) _RSYSLOG_LINE = ( _PYPARSING_COMPONENTS['rfc3339_datetime'].setResultsName('datetime') + _PYPARSING_COMPONENTS['hostname'] + _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional( pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] + pyparsing.Suppress(']')) + pyparsing.Optional( pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] + pyparsing.Suppress('>')) + pyparsing.Optional(pyparsing.Suppress(':')) + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) _RSYSLOG_TRADITIONAL_LINE = ( _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['hostname'] + _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional( pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] + pyparsing.Suppress(']')) + pyparsing.Optional( pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] + pyparsing.Suppress('>')) + pyparsing.Optional(pyparsing.Suppress(':')) + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS['date'] + pyparsing.Suppress(':') + pyparsing.Suppress('---') + _PYPARSING_COMPONENTS['comment_body'] + pyparsing.Suppress('---') + pyparsing.LineEnd()) _KERNEL_SYSLOG_LINE = ( _PYPARSING_COMPONENTS['date'] + pyparsing.Literal('kernel').setResultsName('reporter') + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) LINE_STRUCTURES = [('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE), ('kernel_syslog_line', _KERNEL_SYSLOG_LINE), ('rsyslog_line', _RSYSLOG_LINE), ('rsyslog_traditional_line', _RSYSLOG_TRADITIONAL_LINE), ('syslog_comment', _SYSLOG_COMMENT)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser.""" super(SyslogParser, self).__init__() self._last_month = 0 self._maximum_year = 0 self._plugin_by_reporter = {} self._year_use = 0 def _UpdateYear(self, mediator, month): """Updates the year to use for events, based on last observed month. Args: mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. month (int): month observed by the parser, where January is 1. """ if not self._year_use: self._year_use = mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = mediator.GetLatestYear() if not self._last_month: self._last_month = month return # Some syslog daemons allow out-of-order sequences, so allow some leeway # to not cause Apr->May->Apr to cause the year to increment. # See http://bugzilla.adiscon.com/show_bug.cgi?id=527 if self._last_month > (month + 1): if self._year_use != self._maximum_year: self._year_use += 1 self._last_month = month def EnablePlugins(self, plugin_includes): """Enables parser plugins. Args: plugin_includes (list[str]): names of the plugins to enable, where None or an empty list represents all plugins. Note that the default plugin is handled separately. """ super(SyslogParser, self).EnablePlugins(plugin_includes) self._plugin_by_reporter = {} for plugin in self._plugins: self._plugin_by_reporter[plugin.REPORTER] = plugin def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key in ('chromeos_syslog_line', 'rsyslog_line'): date_time = dfdatetime_time_elements.TimeElementsInMicroseconds() iso8601_string = self._GetValueFromStructure(structure, 'datetime') try: date_time.CopyFromStringISO8601(iso8601_string) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0:s}'.format(iso8601_string)) return else: # TODO: add support for fractional seconds. month = self._GetValueFromStructure(structure, 'month') try: month = timelib.MONTH_DICT.get(month.lower(), 0) except AttributeError: parser_mediator.ProduceExtractionWarning( 'invalid month value: {0!s}'.format(month)) return if month != 0: self._UpdateYear(parser_mediator, month) day = self._GetValueFromStructure(structure, 'day') hours = self._GetValueFromStructure(structure, 'hour') minutes = self._GetValueFromStructure(structure, 'minute') seconds = self._GetValueFromStructure(structure, 'second') time_elements_tuple = (self._year_use, month, day, hours, minutes, seconds) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format( time_elements_tuple)) return plugin = None if key == 'syslog_comment': event_data = SyslogCommentEventData() event_data.body = self._GetValueFromStructure(structure, 'body') # TODO: pass line number to offset or remove. event_data.offset = 0 else: event_data = SyslogLineEventData() event_data.body = self._GetValueFromStructure(structure, 'body') event_data.hostname = self._GetValueFromStructure( structure, 'hostname') # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = self._GetValueFromStructure(structure, 'pid') event_data.reporter = self._GetValueFromStructure( structure, 'reporter') event_data.severity = self._GetValueFromStructure( structure, 'severity') plugin = self._plugin_by_reporter.get(event_data.reporter, None) if plugin: attributes = { 'body': event_data.body, 'hostname': event_data.hostname, 'pid': event_data.pid, 'reporter': event_data.reporter, 'severity': event_data.severity } try: # TODO: pass event_data instead of attributes. plugin.Process(parser_mediator, date_time, attributes) except errors.WrongPlugin: plugin = None if not plugin: event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, lines): """Verifies that this is a syslog-formatted file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ return bool(self._VERIFICATION_REGEX.match(lines))
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser): """Parses events from Bash history files.""" NAME = 'bash' DESCRIPTION = 'Parser for Bash history files' _ENCODING = 'utf-8' _TIMESTAMP = pyparsing.Suppress('#') + pyparsing.Word( pyparsing.nums, min=9, max=10).setParseAction( text_parser.PyParseIntCast).setResultsName('timestamp') _COMMAND = pyparsing.Regex( r'.*?(?=($|\n#\d{10}))', re.DOTALL).setResultsName('command') _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd() _VERIFICATION_GRAMMAR = ( pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) + _TIMESTAMP + pyparsing.NotAny(pyparsing.pythonStyleComment)) LINE_STRUCTURES = [('log_entry', _LINE_GRAMMAR)] def ParseRecord(self, parser_mediator, key, structure): """Parses a record and produces a Bash history event. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key != 'log_entry': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) event_data = BashHistoryEventData() event_data.command = self._GetValueFromStructure(structure, 'command') timestamp = self._GetValueFromStructure(structure, 'timestamp') date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) # pylint: disable=unused-argument def VerifyStructure(self, parser_mediator, lines): """Verifies that this is a bash history file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ match_generator = self._VERIFICATION_GRAMMAR.scanString(lines, maxMatches=1) return bool(list(match_generator))
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser): """Parses SkyDrive log files.""" NAME = u'skydrive_log' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) log files.' _ENCODING = u'utf-8' # Common SDF (SkyDrive Format) structures. INTEGER_CAST = text_parser.PyParseIntCast HYPHEN = text_parser.PyparsingConstants.HYPHEN TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS TIME_MSEC = text_parser.PyparsingConstants.TIME_MSEC MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(INTEGER_CAST) COMMA = pyparsing.Literal(u',').suppress() DOT = pyparsing.Literal(u'.').suppress() IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress() # Header line timestamp (2013-07-25-160323.291): the timestamp format is # YYYY-MM-DD-hhmmss.msec. SDF_HEADER_TIMESTAMP = pyparsing.Group( text_parser.PyparsingConstants.DATE.setResultsName(u'date') + HYPHEN + TWO_DIGITS.setResultsName(u'hh') + TWO_DIGITS.setResultsName(u'mm') + TWO_DIGITS.setResultsName(u'ss') + DOT + MSEC.setResultsName(u'ms')).setResultsName(u'hdr_timestamp') # Line timestamp (07-25-13,16:06:31.820): the timestamp format is # MM-DD-YY,hh:mm:ss.msec. SDF_TIMESTAMP = ( TWO_DIGITS.setResultsName(u'month') + HYPHEN + TWO_DIGITS.setResultsName(u'day') + HYPHEN + TWO_DIGITS.setResultsName(u'year_short') + COMMA + TIME_MSEC.setResultsName(u'time')).setResultsName(u'timestamp') # Header start. SDF_HEADER_START = ( pyparsing.Literal(u'######').suppress() + pyparsing.Literal(u'Logging started.').setResultsName(u'log_start')) # Multiline entry end marker, matched from right to left. SDF_ENTRY_END = pyparsing.StringEnd() | SDF_HEADER_START | SDF_TIMESTAMP # SkyDrive line pyparsing structure. SDF_LINE = (SDF_TIMESTAMP + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'module') + COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'source_code') + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'log_level') + COMMA + pyparsing.SkipTo(SDF_ENTRY_END).setResultsName(u'detail') + pyparsing.ZeroOrMore(pyparsing.lineEnd())) # SkyDrive header pyparsing structure. SDF_HEADER = ( SDF_HEADER_START + pyparsing.Literal(u'Version=').setResultsName(u'version_string') + pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'version_number') + pyparsing.Literal(u'StartSystemTime:').suppress() + SDF_HEADER_TIMESTAMP + pyparsing.Literal( u'StartLocalTime:').setResultsName(u'local_time_string') + pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') + pyparsing.lineEnd()) # Define the available log line structures. LINE_STRUCTURES = [(u'logline', SDF_LINE), (u'header', SDF_HEADER)] def __init__(self): """Initializes a parser object.""" super(SkyDriveLogParser, self).__init__() self.use_local_zone = False def _GetTimestampFromHeader(self, structure): """Gets a timestamp from the structure. The following is an example of the timestamp structure expected [[2013, 7, 25], 16, 3, 23, 291]: DATE (year, month, day) is the first list element, than hours, minutes, seconds and milliseconds follow. Args: structure: The parsed structure, which should be a timestamp. Returns: An integer containing the timestamp or 0 on error. """ year, month, day = structure.date hour = structure.get(u'hh', 0) minute = structure.get(u'mm', 0) second = structure.get(u'ss', 0) microsecond = structure.get(u'ms', 0) * 1000 return timelib.Timestamp.FromTimeParts(year, month, day, hour, minute, second, microseconds=microsecond) def _GetTimestampFromLine(self, structure): """Gets a timestamp from string from the structure The following is an example of the timestamp structure expected [7, 25, 13, [16, 3, 24], 649]: month, day, year, a list with three element (hours, minutes, seconds) and finally milliseconds. Args: structure: The parsed structure. Returns: An integer containing the timestamp or 0 on error. """ hour, minute, second = structure.time[0] microsecond = structure.time[1] * 1000 # TODO: Verify if timestamps are locale dependent. year = structure.get(u'year_short', 0) month = structure.get(u'month', 0) day = structure.get(u'day', 0) if year < 0 or not month or not day: return 0 year += 2000 return timelib.Timestamp.FromTimeParts(year, month, day, hour, minute, second, microseconds=microsecond) def _ParseHeader(self, structure): """Parse header lines and store appropriate attributes. [u'Logging started.', u'Version=', u'17.0.2011.0627', [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>'] Args: structure: A pyparsing.ParseResults object from an header line in the log file. Returns: An event object (instance of SkyDriveLogEvent) or None on error. """ timestamp = self._GetTimestampFromHeader(structure.hdr_timestamp) if not timestamp: logging.debug(u'SkyDriveLog invalid timestamp {0:d}'.format( structure.hdr_timestamp)) return detail = u'{0:s} {1:s} {2:s} {3:s} {4:s}'.format( structure.log_start, structure.version_string, structure.version_number, structure.local_time_string, structure.details) return SkyDriveLogEvent(timestamp, detail) def _ParseLine(self, structure): """Parse a logline and store appropriate attributes. Args: structure: A pyparsing.ParseResults object from a line in the log file. Returns: An event object (instance of SkyDriveLogEvent) or None. """ timestamp = self._GetTimestampFromLine(structure.timestamp) if not timestamp: logging.debug(u'SkyDriveLog invalid timestamp {0:s}'.format( structure.timestamp)) return # Replace newlines with spaces in structure.detail to preserve output. detail = structure.detail.replace(u'\n', u' ') return SkyDriveLogEvent(timestamp, detail, module=structure.module, source_code=structure.source_code, log_level=structure.log_level) def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator: A parser mediator object (instance of ParserMediator). key: An identification string indicating the name of the parsed structure. structure: A pyparsing.ParseResults object from a line in the log file. """ event_object = None if key == u'logline': event_object = self._ParseLine(structure) elif key == u'header': event_object = self._ParseHeader(structure) else: logging.warning( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if event_object: parser_mediator.ProduceEvent(event_object) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive log file. Args: parser_mediator: A parser mediator object (instance of ParserMediator). line: A single line from the text file. Returns: True if this is the correct parser, False otherwise. """ try: parsed_structure = self.SDF_HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive log file') return False timestamp = self._GetTimestampFromHeader( parsed_structure.hdr_timestamp) if not timestamp: logging.debug( u'Not a SkyDrive log file, invalid timestamp {0:s}'.format( parsed_structure.timestamp)) return False return True
class SetupapiLogParser(text_parser.PyparsingSingleLineTextParser): """Parses events from Windows Setupapi log files.""" NAME = 'setupapi' DATA_FORMAT = 'Windows SetupAPI log file' _ENCODING = 'utf-8' _SLASH = pyparsing.Literal('/').suppress() _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS _SETUPAPI_DATE_TIME = pyparsing.Group( _FOUR_DIGITS + _SLASH + _TWO_DIGITS + _SLASH + _TWO_DIGITS + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Word('.,', exact=1).suppress() + _THREE_DIGITS) # Disable pylint due to long URLs for documenting structures. # pylint: disable=line-too-long # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-header _LOG_HEADER_START = (pyparsing.Literal('[Device Install Log]') + pyparsing.lineEnd()) # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-header _LOG_HEADER_END = (pyparsing.Literal('[BeginLog]') + pyparsing.lineEnd()) # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-header _SECTION_HEADER = (pyparsing.Literal('>>> [').suppress() + pyparsing.CharsNotIn(']').setResultsName('entry_type') + pyparsing.Literal(']') + pyparsing.lineEnd()) # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-header _SECTION_HEADER_START = ( pyparsing.Literal('>>> Section start').suppress() + _SETUPAPI_DATE_TIME.setResultsName('start_time') + pyparsing.lineEnd()) # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-footer _SECTION_END = (pyparsing.Literal('<<< Section end ').suppress() + _SETUPAPI_DATE_TIME.setResultsName('end_time') + pyparsing.lineEnd()) # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-footer _SECTION_END_EXIT_STATUS = ( pyparsing.Literal('<<< [Exit status: ').suppress() + pyparsing.CharsNotIn(']').setResultsName('exit_status') + pyparsing.Literal(']') + pyparsing.lineEnd()) # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-log-entries-that-are-not-part-of-a-text-log-section _SECTION_BODY_LINE = (pyparsing.stringStart + pyparsing.MatchFirst([ pyparsing.Literal('!!! '), pyparsing.Literal('! '), pyparsing.Literal(' ') ]) + pyparsing.restOfLine).leaveWhitespace() # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-log-entries-that-are-not-part-of-a-text-log-section _NON_SECTION_LINE = (pyparsing.stringStart + pyparsing.MatchFirst([ pyparsing.Literal(' . '), pyparsing.Literal('!!! '), pyparsing.Literal('! '), pyparsing.Literal(' ') ]) + pyparsing.restOfLine).leaveWhitespace() # These lines do not appear to be documented in the Microsoft documentation. _BOOT_SESSION_LINE = (pyparsing.Literal('[Boot Session:') + _SETUPAPI_DATE_TIME + pyparsing.Literal(']')) # pylint: enable=line-too-long LINE_STRUCTURES = [('ignorable_line', _BOOT_SESSION_LINE), ('ignorable_line', _LOG_HEADER_END), ('ignorable_line', _LOG_HEADER_START), ('ignorable_line', _NON_SECTION_LINE), ('ignorable_line', _SECTION_BODY_LINE), ('section_end', _SECTION_END), ('section_end_exit_status', _SECTION_END_EXIT_STATUS), ('section_header', _SECTION_HEADER), ('section_start', _SECTION_HEADER_START)] def __init__(self): """Initializes a setupapi parser.""" super(SetupapiLogParser, self).__init__() self._last_end_time = None self._last_entry_type = None def _GetTimeElements(self, time_structure): """Builds time elements from a setupapi time_stamp field. Args: time_structure (pyparsing.ParseResults): structure of tokens derived from a setupapi time_stamp field. Returns: dfdatetime.TimeElements: date and time extracted from the value or None if the structure does not represent a valid date and time value. """ try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_structure) # Setupapi logs store date and time values in local time. date_time.is_local_time = True return date_time except ValueError: return None def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a log entry. Raises: ParseError: when the structure type is unknown. """ if key == 'ignorable_line': return if key == 'section_header': self._last_entry_type = self._GetValueFromStructure( structure, 'entry_type') return if key == 'section_start': time_structure = self._GetValueFromStructure( structure, 'start_time') start_time = self._GetTimeElements(time_structure) if not start_time: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_structure)) return event_data = SetupapiLogEventData() event_data.entry_type = self._last_entry_type event = time_events.DateTimeValuesEvent( start_time, definitions.TIME_DESCRIPTION_START, time_zone=parser_mediator.timezone) # Create event for the start of the setupapi section parser_mediator.ProduceEventWithEventData(event, event_data) return if key == 'section_end': time_structure = self._GetValueFromStructure(structure, 'end_time') end_time = self._GetTimeElements(time_structure) if not end_time: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_structure)) # Store last end time so that an event with the data from the # following exit status section can be created. self._last_end_time = end_time return if key == 'section_end_exit_status': exit_status = self._GetValueFromStructure(structure, 'exit_status') if self._last_end_time: event_data = SetupapiLogEventData() event_data.entry_type = self._last_entry_type event_data.exit_status = exit_status event = time_events.DateTimeValuesEvent( self._last_end_time, definitions.TIME_DESCRIPTION_END, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, event_data) # Reset entry type and status and end time in case a line is missing. self._last_entry_type = None self._last_end_time = None return raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Windows Setupapi log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): single line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: self._LOG_HEADER_START.parseString(line) # Reset stored values for parsing a new file. self._last_end_time = None self._last_entry_type = None except pyparsing.ParseException as exception: logger.debug( 'Not a Windows Setupapi log file: {0!s}'.format(exception)) return False return True
def _parse_timing_summary_tables(time_rpt: str): """ Return tables from a Vivado timing summary report. This currently only handles basic tables such as "Design Timing Summary" and "Clock Summary". The more complex data in "Timing Details" such as worst paths, etc. isn't parsed. """ # Make newlines widely significant but be careful not to effect others saved_whitespace = pp.ParserElement.DEFAULT_WHITE_CHARS pp.ParserElement.setDefaultWhitespaceChars(" \t") # Extract table title ("Clock Summary") from a section heading like: # # -------------------------------------------------------------- # | Clock Summary # | ------------- # -------------------------------------------------------------- sec_hline = pp.lineStart() + pp.Word("-") + pp.lineEnd() sec_row_start = pp.lineStart() + pp.Literal("|").suppress() sec_title = (sec_row_start + pp.SkipTo(pp.lineEnd())("title") + pp.lineEnd().suppress()) sec_title_uline = sec_row_start + pp.Suppress( pp.Word("-") + pp.lineEnd()) section_head = sec_hline + sec_title + sec_title_uline + sec_hline blank_line = pp.Suppress(pp.lineEnd() * 2) # Tables are headings followed by lines and then data. # # Clock Waveform(ns) Period(ns) Frequency(MHz) # ----- ------------ ---------- -------------- # clk {0.000 2.500} 5.000 200.000 # Match two or more groups of dashes to avoid matching long single # horizontal lines used elsewhere. Normally the spaces between the # groups of dashes would be consumed, so get them back with # originalTextFor(). It would be safer to anchor this to the start of # the line, but "Design Timing Summary" and perhaps others indent the # table for some reason table_hline = pp.originalTextFor(pp.Word("-") * (2, ) + pp.lineEnd()) # Get any header rows above the horizontal lines table_head = pp.SkipTo(table_hline, failOn=blank_line) # Get everything from the horizontal lines to an empty line table_body = pp.SkipTo(blank_line) # The adjacent argument shouldn't be required but it doesn't match # anything without it. It seems like the newline at the end of the # heading row may not be getting included somehow. table = pp.Combine(table_head + table_hline + table_body, adjacent=False)("table") section = section_head + pp.lineEnd().suppress() + table # Restore whitespace characters pp.ParserElement.setDefaultWhitespaceChars(saved_whitespace) table_dict = { x["title"]: x["table"] for x in section.searchString(time_rpt) } return table_dict
class ApacheAccessParser(text_parser.PyparsingSingleLineTextParser): """Apache access log (access.log) file parser.""" NAME = 'apache_access' DATA_FORMAT = 'Apache access log (access.log) file' MAX_LINE_LENGTH = 2048 # Date format [18/Sep/2011:19:18:28 -0400] _DATE_TIME = pyparsing.Group( pyparsing.Suppress('[') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('day') + pyparsing.Suppress('/') + text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') + pyparsing.Suppress('/') + text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year') + pyparsing.Suppress(':') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hours') + pyparsing.Suppress(':') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minutes') + pyparsing.Suppress(':') + text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('seconds') + pyparsing.Combine( pyparsing.oneOf(['-', '+']) + pyparsing.Word( pyparsing.nums, exact=4)).setResultsName('time_offset') + pyparsing.Suppress(']')).setResultsName('date_time') _HTTP_REQUEST = ( pyparsing.Suppress('"') + pyparsing.SkipTo('" ').setResultsName('http_request') + pyparsing.Suppress('"')) _PORT_NUMBER = text_parser.PyparsingConstants.INTEGER.setResultsName( 'port_number') _REMOTE_NAME = ( pyparsing.Word(pyparsing.alphanums) | pyparsing.Literal('-')).setResultsName('remote_name') _RESPONSE_BYTES = ( pyparsing.Literal('-') | text_parser.PyparsingConstants.INTEGER).setResultsName('response_bytes') _REFERER = ( pyparsing.Suppress('"') + pyparsing.SkipTo('" ').setResultsName('referer') + pyparsing.Suppress('"')) _SERVER_NAME = ( pyparsing.Word(pyparsing.alphanums + '-' + '.').setResultsName( 'server_name')) _USER_AGENT = ( pyparsing.Suppress('"') + pyparsing.SkipTo('"').setResultsName('user_agent') + pyparsing.Suppress('"')) _USER_NAME = ( pyparsing.Word(pyparsing.alphanums) | pyparsing.Literal('-')).setResultsName('user_name') # Defined in https://httpd.apache.org/docs/2.4/logs.html # format: "%h %l %u %t \"%r\" %>s %b" _COMMON_LOG_FORMAT_LINE = ( text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST + text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') + _RESPONSE_BYTES + pyparsing.lineEnd()) # Defined in https://httpd.apache.org/docs/2.4/logs.html # format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" _COMBINED_LOG_FORMAT_LINE = ( text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST + text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') + _RESPONSE_BYTES + _REFERER + _USER_AGENT + pyparsing.lineEnd()) # "vhost_combined" format as used by Debian and related distributions. # "%v:%p %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" _VHOST_COMBINED_LOG_FORMAT = ( _SERVER_NAME + pyparsing.Suppress(':') + _PORT_NUMBER + text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST + text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') + _RESPONSE_BYTES + _REFERER + _USER_AGENT + pyparsing.lineEnd() ) LINE_STRUCTURES = [ ('combined_log_format', _COMBINED_LOG_FORMAT_LINE), ('common_log_format', _COMMON_LOG_FORMAT_LINE), ('vhost_combined_log_format', _VHOST_COMBINED_LOG_FORMAT)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def _GetDateTime(self, structure): """Retrieves the date and time from a date and time values structure. The date and time values in Apache access log files are formatted as: "[18/Sep/2011:19:18:28 -0400]". Args: structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Returns: dfdatetime.DateTimeValues: date and time. Raises: ValueError: if the structure cannot be converted into a date time string. """ year = self._GetValueFromStructure(structure, 'year') month = self._GetValueFromStructure(structure, 'month') try: month = self._MONTH_DICT.get(month.lower(), 0) except AttributeError as exception: raise ValueError('unable to parse month with error: {0!s}.'.format( exception)) day_of_month = self._GetValueFromStructure(structure, 'day') hours = self._GetValueFromStructure(structure, 'hours') minutes = self._GetValueFromStructure(structure, 'minutes') seconds = self._GetValueFromStructure(structure, 'seconds') time_offset = self._GetValueFromStructure(structure, 'time_offset') try: time_zone_offset = int(time_offset[1:3], 10) * 60 time_zone_offset += int(time_offset[3:5], 10) if time_offset[0] == '-': time_zone_offset *= -1 except (TypeError, ValueError) as exception: raise ValueError( 'unable to parse time zone offset with error: {0!s}.'.format( exception)) time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds) return dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple, time_zone_offset=time_zone_offset) def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) date_time_string = self._GetValueFromStructure(structure, 'date_time') try: date_time = self._GetDateTime(date_time_string) except ValueError as exception: parser_mediator.ProduceExtractionWarning( 'unable to parse date time value: {0!s} with error: {1!s}'.format( date_time_string, exception)) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_RECORDED) event_data = ApacheAccessEventData() event_data.ip_address = self._GetValueFromStructure(structure, 'ip_address') event_data.remote_name = self._GetValueFromStructure( structure, 'remote_name') event_data.user_name = self._GetValueFromStructure(structure, 'user_name') event_data.http_request = self._GetValueFromStructure( structure, 'http_request') event_data.http_response_code = self._GetValueFromStructure( structure, 'response_code') event_data.http_response_bytes = self._GetValueFromStructure( structure, 'response_bytes') if key in ('combined_log_format', 'vhost_combined_log_format'): event_data.http_request_referer = self._GetValueFromStructure( structure, 'referer') event_data.http_request_user_agent = self._GetValueFromStructure( structure, 'user_agent') if key == 'vhost_combined_log_format': event_data.server_name = self._GetValueFromStructure( structure, 'server_name') event_data.port_number = self._GetValueFromStructure( structure, 'port_number') parser_mediator.ProduceEventWithEventData(event, event_data) # pylint: disable=unused-argument def VerifyStructure(self, parser_mediator, line): """Verifies that this is an apache access log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ return max([parser.matches(line) for _, parser in self.LINE_STRUCTURES])
class GoogleLogParser(text_parser.PyparsingMultiLineTextParser): """A parser for Google log formatted files. See https://google.io/docs/python/guides/logging """ NAME = 'googlelog' DESCRIPTION = 'Parser for handling Google log formatted files.' # PyParsing components used to construct grammars for parsing lines. _PYPARSING_COMPONENTS = { 'priority': ( pyparsing.oneOf(['I', 'W', 'E', 'F']).setResultsName('priority')), 'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName( 'year'), 'month_number': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'month_number'), 'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( 'day'), 'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'hour'), 'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'minute'), 'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'second'), 'microsecond': pyparsing.Word(pyparsing.nums, exact=6).setParseAction( text_parser.PyParseIntCast).setResultsName('microsecond'), 'thread_identifier': pyparsing.Word(pyparsing.nums).setResultsName( 'thread_identifier'), 'file_name': (pyparsing.Word(pyparsing.printables.replace(':', '')).setResultsName( 'file_name')), 'line_number': ( pyparsing.Word(pyparsing.nums).setResultsName('line_number')), 'message': ( pyparsing.Regex('.*?(?=($|\n[IWEF][0-9]{4}))', re.DOTALL). setResultsName('message'))} _PYPARSING_COMPONENTS['date'] = ( _PYPARSING_COMPONENTS['month_number'] + _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Optional( pyparsing.Suppress('.') + _PYPARSING_COMPONENTS['microsecond'])) # Grammar for individual log event lines. _LOG_LINE = ( _PYPARSING_COMPONENTS['priority'] + _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['thread_identifier'] + _PYPARSING_COMPONENTS['file_name'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['line_number'] + pyparsing.Suppress('] ') + _PYPARSING_COMPONENTS['message'] + pyparsing.lineEnd()) # Grammar for the log file greeting. _GREETING = ( _PYPARSING_COMPONENTS['year'] + pyparsing.Suppress('/') + _PYPARSING_COMPONENTS['month_number'] + pyparsing.Suppress('/') + _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Regex('.*?(?=($|\n[IWEF][0-9]{4}))', re.DOTALL) + pyparsing.lineEnd()) _GREETING_START = 'Log file created at: ' # Our initial buffer length is the length of the string we verify with. _INITIAL_BUFFER_SIZE = len(_GREETING_START) # Once we're sure we're reading a valid file, we'll use a larger read buffer. _RUNNING_BUFFER_SIZE = 5120 # Order is important here, as the structures are checked against each line # sequentially, so we put the most common first, and the most expensive # last. LINE_STRUCTURES = [ ('log_entry', _LOG_LINE), ('greeting_start', pyparsing.Literal(_GREETING_START)), ('greeting', _GREETING)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a Google log formatted file parser.""" super(GoogleLogParser, self).__init__() # Set the size of the file we need to read to verify it. self._buffer_size = self._INITIAL_BUFFER_SIZE self._maximum_year = 0 # The year to use for events. The initial year is stored in the log file # greeting. self._year = 0 # The month the last observed event occurred. self._last_month = 0 def _UpdateYear(self, mediator, month): """Updates the year to use for events, based on last observed month. Args: mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. month (int): month observed by the parser, where January is 1. """ if not self._year: self._year = mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = mediator.GetLatestYear() if not self._last_month: self._last_month = month return # TODO: Check whether out of order events are possible if self._last_month > (month + 1): if self._year != self._maximum_year: self._year += 1 self._last_month = month def _ReadGreeting(self, structure): """Extract useful information from the logfile greeting. Args: structure (pyparsing.ParseResults): elements parsed from the file. """ self._year = self._GetValueFromStructure(structure, 'year') self._last_month = self._GetValueFromStructure(structure, 'month_number') def _ParseLine(self, parser_mediator, structure): """Process a single log line into a GoogleLogEvent. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): elements parsed from the file. """ month = self._GetValueFromStructure(structure, 'month_number') if month != 0: self._UpdateYear(parser_mediator, month) day = self._GetValueFromStructure(structure, 'day') hours = self._GetValueFromStructure(structure, 'hour') minutes = self._GetValueFromStructure(structure, 'minute') seconds = self._GetValueFromStructure(structure, 'second') microseconds = self._GetValueFromStructure(structure, 'microsecond') time_elements_tuple = ( self._year, month, day, hours, minutes, seconds, microseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMicroseconds( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return event_data = GoogleLogEventData() event_data.priority = self._GetValueFromStructure(structure, 'priority') event_data.thread_identifier = self._GetValueFromStructure( structure, 'thread_identifier') event_data.file_name = self._GetValueFromStructure(structure, 'file_name') event_data.line_number = self._GetValueFromStructure( structure, 'line_number') event_data.message = self._GetValueFromStructure(structure, 'message') event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'greeting': self._ReadGreeting(structure) elif key == 'log_entry': self._ParseLine(parser_mediator, structure) def VerifyStructure(self, parser_mediator, lines): """Verifies that this is a google log-formatted file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ if not lines.startswith(self._GREETING_START): return False # Now that we know this is a valid log, expand the read buffer to the # maximum size we expect a log event to be (which is quite large). self._buffer_size = self._RUNNING_BUFFER_SIZE self._year = parser_mediator.year return True
def dockerfile_instruction_grammar(self): """dockerfile_instruction_grammar""" # # Fail Action - Error template - Line / Col / Instruction # def error(s, loc, expr, error): """Main error template""" raise ParseFatalException(DOCKERFILE_ERROR[202].format( ligne=self.line_counter, colonne=error.loc, inst=self.currentInstructionName, erreur=error.msg)) # # Parse Action (Basic verification) # def arg_validate(strng, loc, toks): """Do some verfications for the instruction arguments""" if not self.validator.validate_instruction(toks): raise ParseFatalException(self.validator.get_errors(), loc=loc) def instructions_parse(strng, loc, toks): """Check if the instruction exist in the config file""" self.currentInstructionName = toks[0] if toks[0] not in INSTRUCTION_CONFIG_LIST: raise ParseFatalException(DOCKERFILE_ERROR[211], loc=loc) self.currentInstruction = INSTRUCTION_CONFIG_LIST[toks[0]] def args_table_parse(strng, loc, toks): """Check if the table form is correct for the current instruction arguments""" if (self.currentInstruction[0] == 1): raise ParseFatalException(DOCKERFILE_ERROR[213], loc=loc) def args_list_parse(strng, loc, toks): """Check if the list form is correct for the current instruction arguments""" if (self.currentInstruction[0] == 2): raise ParseFatalException(DOCKERFILE_ERROR[214], loc=loc) def args_num_parse(strng, loc, toks): """Check if the number of arguments is correct""" minArg = self.currentInstruction[1] maxArg = self.currentInstruction[2] nbArgs = len(toks) if (not minArg <= nbArgs <= maxArg): raise ParseFatalException(DOCKERFILE_ERROR[215].format( nombre=nbArgs, min=minArg, max=maxArg), loc=loc) def opt_parse(strng, loc, toks): """Check if the option exist and if she's correct for the current instruction""" if toks[0] not in OPTIONAL_OPTION_CONFIG: raise ParseFatalException( DOCKERFILE_ERROR[216].format(opt=toks[0]), loc=loc) if self.currentInstructionName not in OPTIONAL_OPTION_CONFIG[ toks[0]]: raise ParseFatalException( DOCKERFILE_ERROR[217].format(opt=toks[0]), loc=loc) #INIT ParserElement.setDefaultWhitespaceChars(" \t") # # TERMINALS # INST = Regex(r'([A-Z]+)(?<!\s)').setName('Instruction').setParseAction( instructions_parse) OPT = Regex(r'--[a-z]+=').setName('Option').setParseAction(opt_parse) STR = Regex(r'\"((.|\s)+?)\"').setName("chaîne de caractère") ARG = Regex(r'\S+').setName("argument") EOL = lineEnd().setName("fin de ligne").suppress() COM = Regex(r'#.*').suppress() OH = Literal('[').suppress() CH = Literal(']').suppress() CO = Literal(',').suppress() # # NO TERMINALS # #Arguments t_args_table = OH - STR - ZeroOrMore(CO - STR) - CH t_args_table.setName('["argument1", "argument2" …]') t_args_table.setParseAction(args_table_parse) t_args_list = ARG - ZeroOrMore(ARG) t_args_list.setName('argument1 argument2 …') t_args_list.setParseAction(args_list_parse) t_args = (t_args_table | t_args_list) t_args.setParseAction(args_num_parse) #Multiple lines separator continuation = '\\' - EOL #Optional elements t_opt = OneOrMore(OPT - Group(ARG)) t_opt.setParseAction(opt_parse) #instruction instruction = (INST - Group(Optional(t_opt)) - Group(t_args)).setParseAction(arg_validate) #line grammar line = (stringStart - (COM | Optional(instruction)) - EOL - stringEnd()).setFailAction(error) line.ignore(continuation) return line
class SyslogParser(text_parser.PyparsingMultiLineTextParser): """Parses syslog formatted log files""" NAME = u'syslog' DESCRIPTION = u'Syslog Parser' _ENCODING = u'utf-8' _plugin_classes = {} # The reporter and facility fields can contain any printable character, but # to allow for processing of syslog formats that delimit the reporter and # facility with printable characters, we remove certain common delimiters # from the set of printable characters. _REPORTER_CHARACTERS = u''.join( [c for c in pyparsing.printables if c not in [u':', u'[', u'<']]) _FACILITY_CHARACTERS = u''.join( [c for c in pyparsing.printables if c not in [u':', u'>']]) _SYSLOG_SEVERITY = [ u'EMERG', u'ALERT', u'CRIT', u'ERR', u'WARNING', u'NOTICE', u'INFO', u'DEBUG' ] _OFFSET_PREFIX = [u'-', u'+'] _BODY_CONTENT = (r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \ r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \ r'[\+|-]\d{2}:\d{2}\s))') _VERIFICATION_REGEX = \ re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' + _BODY_CONTENT) # The Chrome OS syslog messages are of a format begininng with an # ISO 8601 combined date and time expression with timezone designator: # 2016-10-25T12:37:23.297265-07:00 # # This will then be followed by the SYSLOG Severity which will be one of: # EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG # # 2016-10-25T12:37:23.297265-07:00 INFO _CHROMEOS_VERIFICATION_REGEX = \ re.compile(r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.' r'\d{6}[\+|-]\d{2}:\d{2}\s' r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_CONTENT) _PYPARSING_COMPONENTS = { u'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName(u'year'), u'two_digit_month': (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( u'two_digit_month')), u'month': text_parser.PyparsingConstants.MONTH.setResultsName(u'month'), u'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( u'day'), u'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'hour'), u'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'minute'), u'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'second'), u'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName(u'fractional_seconds'), u'hostname': pyparsing.Word(pyparsing.printables).setResultsName(u'hostname'), u'reporter': pyparsing.Word(_REPORTER_CHARACTERS).setResultsName(u'reporter'), u'pid': text_parser.PyparsingConstants.PID.setResultsName(u'pid'), u'facility': pyparsing.Word(_FACILITY_CHARACTERS).setResultsName(u'facility'), u'severity': pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName(u'severity'), u'body': pyparsing.Regex(_BODY_CONTENT, re.DOTALL).setResultsName(u'body'), u'comment_body': pyparsing.SkipTo(u' ---').setResultsName(u'body'), u'iso_8601_offset': (pyparsing.oneOf(_OFFSET_PREFIX) + text_parser.PyparsingConstants.TWO_DIGITS + pyparsing.Optional( pyparsing.Literal(u':') + text_parser.PyparsingConstants.TWO_DIGITS)) } _PYPARSING_COMPONENTS[u'date'] = ( _PYPARSING_COMPONENTS[u'month'] + _PYPARSING_COMPONENTS[u'day'] + _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'second'] + pyparsing.Optional( pyparsing.Suppress(u'.') + _PYPARSING_COMPONENTS[u'fractional_seconds'])) _PYPARSING_COMPONENTS[u'iso_8601_date'] = pyparsing.Combine( _PYPARSING_COMPONENTS[u'year'] + pyparsing.Literal(u'-') + _PYPARSING_COMPONENTS[u'two_digit_month'] + pyparsing.Literal(u'-') + _PYPARSING_COMPONENTS[u'day'] + pyparsing.Literal(u'T') + _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Literal(u':') + _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Literal(u':') + _PYPARSING_COMPONENTS[u'second'] + pyparsing.Literal(u'.') + _PYPARSING_COMPONENTS[u'fractional_seconds'] + _PYPARSING_COMPONENTS[u'iso_8601_offset'], joinString=u'', adjacent=True).setResultsName(u'iso_8601_date') _CHROMEOS_SYSLOG_LINE = ( _PYPARSING_COMPONENTS[u'iso_8601_date'] + _PYPARSING_COMPONENTS[u'severity'] + _PYPARSING_COMPONENTS[u'reporter'] + pyparsing.Optional(pyparsing.Suppress(u':')) + pyparsing.Optional( pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] + pyparsing.Suppress(u']')) + pyparsing.Optional(pyparsing.Suppress(u':')) + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) _SYSLOG_LINE = ( _PYPARSING_COMPONENTS[u'date'] + _PYPARSING_COMPONENTS[u'hostname'] + _PYPARSING_COMPONENTS[u'reporter'] + pyparsing.Optional( pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] + pyparsing.Suppress(u']')) + pyparsing.Optional( pyparsing.Suppress(u'<') + _PYPARSING_COMPONENTS[u'facility'] + pyparsing.Suppress(u'>')) + pyparsing.Optional(pyparsing.Suppress(u':')) + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS[u'date'] + pyparsing.Suppress(u':') + pyparsing.Suppress(u'---') + _PYPARSING_COMPONENTS[u'comment_body'] + pyparsing.Suppress(u'---') + pyparsing.LineEnd()) _KERNEL_SYSLOG_LINE = ( _PYPARSING_COMPONENTS[u'date'] + pyparsing.Literal(u'kernel').setResultsName(u'reporter') + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) LINE_STRUCTURES = [(u'syslog_line', _SYSLOG_LINE), (u'syslog_line', _KERNEL_SYSLOG_LINE), (u'syslog_comment', _SYSLOG_COMMENT), (u'chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser object.""" super(SyslogParser, self).__init__() self._last_month = 0 self._maximum_year = 0 self._plugin_objects_by_reporter = {} self._year_use = 0 def _UpdateYear(self, mediator, month): """Updates the year to use for events, based on last observed month. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. month (int): month observed by the parser, where January is 1. """ if not self._year_use: self._year_use = mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = mediator.GetLatestYear() if not self._last_month: self._last_month = month return # Some syslog daemons allow out-of-order sequences, so allow some leeway # to not cause Apr->May->Apr to cause the year to increment. # See http://bugzilla.adiscon.com/show_bug.cgi?id=527 if self._last_month > (month + 1): if self._year_use != self._maximum_year: self._year_use += 1 self._last_month = month def EnablePlugins(self, plugin_includes): """Enables parser plugins. Args: plugin_includes (list[str]): names of the plugins to enable, where None or an empty list represents all plugins. Note that the default plugin is handled separately. """ super(SyslogParser, self).EnablePlugins(plugin_includes) self._plugin_objects_by_reporter = {} for plugin_object in self._plugin_objects: self._plugin_objects_by_reporter[ plugin_object.REPORTER] = plugin_object def ParseRecord(self, mediator, key, structure): """Parses a matching entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if key == u'chromeos_syslog_line': timestamp = timelib.Timestamp.FromTimeString( structure.iso_8601_date[0]) else: month = timelib.MONTH_DICT.get(structure.month.lower(), None) if not month: mediator.ProduceParserError( u'Invalid month value: {0:s}'.format(month)) return self._UpdateYear(mediator, month) timestamp = timelib.Timestamp.FromTimeParts( year=self._year_use, month=month, day=structure.day, hour=structure.hour, minutes=structure.minute, seconds=structure.second, timezone=mediator.timezone) plugin_object = None if key == u'syslog_comment': event_data = SyslogCommentEventData() event_data.body = structure.body # TODO: pass line number to offset or remove. event_data.offset = 0 else: event_data = SyslogLineEventData() event_data.body = structure.body event_data.hostname = structure.hostname or None # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = structure.pid event_data.reporter = structure.reporter event_data.severity = structure.severity plugin_object = self._plugin_objects_by_reporter.get( structure.reporter, None) if plugin_object: attributes = { u'hostname': structure.hostname, u'severity': structure.severity, u'reporter': structure.reporter, u'pid': structure.pid, u'body': structure.body } try: # TODO: pass event_data instead of attributes. plugin_object.Process(mediator, timestamp, attributes) except errors.WrongPlugin: plugin_object = None if not plugin_object: event = time_events.TimestampEvent( timestamp, eventdata.EventTimestamp.WRITTEN_TIME) mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, unused_mediator, line): """Verifies that this is a syslog-formatted file. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. line (str): single line from the text file. Returns: bool: whether the line appears to contain syslog content. """ return (re.match(self._VERIFICATION_REGEX, line) or re.match( self._CHROMEOS_VERIFICATION_REGEX, line)) is not None
class Exim4Parser(text_parser.PyparsingSingleLineTextParser): """Parses exim4 formatted log files""" NAME = u'exim4' DESCRIPTION = u'Exim4 Parser' _ENCODING = u'utf-8' _VERIFICATION_REGEX = re.compile( r'^\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\s') _plugin_classes = {} # The reporter and facility fields can contain any printable character, but # to allow for processing of syslog formats that delimit the reporter and # facility with printable characters, we remove certain common delimiters # from the set of printable characters. _REPORTER_CHARACTERS = u''.join( [c for c in pyparsing.printables if c not in [u':', u'[', u'<']]) _FACILITY_CHARACTERS = u''.join( [c for c in pyparsing.printables if c not in [u':', u'>']]) _PYPARSING_COMPONENTS = { u'year': text_parser.PyparsingConstants.YEAR.setResultsName(u'year'), u'month': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'month'), u'day': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'day'), u'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'hour'), u'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'minute'), u'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'second'), u'body': pyparsing.Regex(r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}))', re.DOTALL).setResultsName(u'body') } _PYPARSING_COMPONENTS[u'date'] = ( _PYPARSING_COMPONENTS[u'year'] + pyparsing.Suppress(u'-') + _PYPARSING_COMPONENTS[u'month'] + pyparsing.Suppress(u'-') + _PYPARSING_COMPONENTS[u'day'] + _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'second']) _EXIM4_LINE = (_PYPARSING_COMPONENTS[u'date'] + pyparsing.Optional(pyparsing.Suppress(u':')) + _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd()) LINE_STRUCTURES = [(u'exim4_line', _EXIM4_LINE)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser object.""" super(Exim4Parser, self).__init__() self._last_month = 0 self._maximum_year = 0 self._plugin_objects_by_reporter = {} self._year_use = 0 def EnablePlugins(self, plugin_includes): """Enables parser plugins. Args: plugin_includes (list[str]): names of the plugins to enable, where None or an empty list represents all plugins. Note that the default plugin is handled separately. """ super(Exim4Parser, self).EnablePlugins(plugin_includes) self._plugin_objects_by_reporter = {} for plugin_object in self._plugin_objects: self._plugin_objects_by_reporter[ plugin_object.REPORTER] = plugin_object def ParseRecord(self, mediator, key, structure): """Parses a matching entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: UnableToParseFile: if an unsupported key is provided. """ if key not in self._SUPPORTED_KEYS: raise errors.UnableToParseFile( u'Unsupported key: {0:s}'.format(key)) timestamp = timelib.Timestamp.FromTimeParts(year=structure.year, month=structure.month, day=structure.day, hour=structure.hour, minutes=structure.minute, seconds=structure.second, timezone=mediator.timezone) reporter = structure.reporter attributes = {u'body': structure.body} plugin_object = self._plugin_objects_by_reporter.get(reporter, None) if not plugin_object: event_object = Exim4LineEvent(timestamp, 0, attributes) mediator.ProduceEvent(event_object) else: try: plugin_object.Process(mediator, timestamp, attributes) except errors.WrongPlugin: event_object = Exim4LineEvent(timestamp, 0, attributes) mediator.ProduceEvent(event_object) def VerifyStructure(self, unused_mediator, line): """Verifies that this is a exim4-formatted file. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. line (str): single line from the text file. Returns: bool: whether the line appears to contain syslog content. """ return re.match(self._VERIFICATION_REGEX, line) is not None