class TestTextParser(text_parser.SlowLexicalTextParser): """Implement a text parser object that can successfully parse a text file. To be able to achieve that one function has to be implemented, the ParseDate one. """ NAME = 'test_text' tokens = [ lexer.Token('INITIAL', r'^([\d\/]+) ', 'SetDate', 'TIME'), lexer.Token('TIME', r'([0-9:\.]+) ', 'SetTime', 'STRING_HOST'), lexer.Token('STRING_HOST', r'([^\-]+)- ', 'ParseStringHost', 'STRING'), lexer.Token('STRING', '([^\n]+)', 'ParseString', ''), lexer.Token('STRING', '\n', 'ParseMessage', 'INITIAL') ] def ParseStringHost(self, match, **_): user, host = match.group(1).split(':') self.attributes['hostname'] = host self.attributes['username'] = user def SetDate(self, match, **_): month, day, year = match.group(1).split('/') self.attributes['imonth'] = int(month) self.attributes['iyear'] = int(year) self.attributes['iday'] = int(day) def Scan(self, unused_file_entry): pass def CreateEvent(self, timestamp, offset, attributes): event_object = TestTextEvent(timestamp, 0, attributes) event_object.offset = offset return event_object
class TestTextParser(text_parser.SlowLexicalTextParser): """Implement a text parser object that can successfully parse a text file. To be able to achieve that one function has to be implemented, the ParseDate one. """ NAME = u'test_text' tokens = [ lexer.Token(u'INITIAL', r'^([\d\/]+) ', u'SetDate', u'TIME'), lexer.Token(u'TIME', r'([0-9:\.]+) ', u'SetTime', u'STRING_HOST'), lexer.Token(u'STRING_HOST', r'([^\-]+)- ', u'ParseStringHost', u'STRING'), lexer.Token(u'STRING', r'([^\n]+)', u'ParseString', u''), lexer.Token(u'STRING', r'\n', u'ParseMessage', u'INITIAL') ] def CreateEvent(self, timestamp, offset, attributes): """Creates an event. Args: timestamp: the timestamp which is an integer containing the number of micro seconds since January 1, 1970, 00:00:00 UTC. offset: an integer containing the offset. attributes: a dictionary containing the event attributes. Returns: An event object (instance of EventObject). """ event_object = TestTextEvent(timestamp, 0, attributes) event_object.offset = offset return event_object def ParseStringHost(self, match, **_): """Parses a string containing an username and hostname. Args: match: a regular expression match. """ user, host = match.group(1).split(u':') self.attributes[u'hostname'] = host self.attributes[u'username'] = user def SetDate(self, match, **_): """Parses a date string. Args: match: a regular expression match. """ month, day, year = match.group(1).split(u'/') self.attributes[u'imonth'] = int(month) self.attributes[u'iyear'] = int(year) self.attributes[u'iday'] = int(day)
class PathReplacer(lexer.Lexer): """Replace path variables with values gathered from earlier preprocessing.""" tokens = [ lexer.Token('.', '{{([^}]+)}}', 'ReplaceVariable', ''), lexer.Token('.', '{([^}]+)}', 'ReplaceString', ''), lexer.Token('.', '([^{])', 'ParseString', ''), ] def __init__(self, pre_obj, data=''): """Constructor for a path replacer.""" super(PathReplacer, self).__init__(data) self._path = [] self._pre_obj = pre_obj def GetPath(self): """Run the lexer and replace path.""" while True: _ = self.NextToken() if self.Empty(): break return u''.join(self._path) def ParseString(self, match, **_): """Append a string to the path.""" self._path.append(match.group(1)) def ReplaceVariable(self, match, **_): """Replace a string that should not be a variable.""" self._path.append(u'{{{0:s}}}'.format(match.group(1))) def ReplaceString(self, match, **_): """Replace a variable with a given attribute.""" replace = getattr(self._pre_obj, match.group(1), None) if replace: self._path.append(replace) else: raise errors.PathNotFound( u'Path variable: {} not discovered yet.'.format( match.group(1)))
class SyslogParser(text_parser.SlowLexicalTextParser): """Parse text based syslog files.""" NAME = u'syslog' DESCRIPTION = u'Parser for syslog files.' # TODO: can we change this similar to SQLite where create an # event specific object for different lines using a callback function. # Define the tokens that make up the structure of a syslog file. tokens = [ lexer.Token( u'INITIAL', u'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ', u'SetMonth', u'DAY'), lexer.Token(u'DAY', r'\s?(\d{1,2})\s+', u'SetDay', u'TIME'), lexer.Token(u'TIME', r'([0-9:\.]+) ', u'SetTime', u'STRING_HOST'), lexer.Token(u'STRING_HOST', r'^--(-)', u'ParseHostname', u'STRING'), lexer.Token( u'STRING_HOST', r'([^\s]+) ', u'ParseHostname', u'STRING_PID'), lexer.Token(u'STRING_PID', r'([^\:\n]+)', u'ParsePid', u'STRING'), lexer.Token(u'STRING', r'([^\n]+)', u'ParseString', u''), lexer.Token(u'STRING', r'\n\t', None, u''), lexer.Token(u'STRING', r'\t', None, u''), lexer.Token(u'STRING', r'\n', u'ParseMessage', u'INITIAL'), lexer.Token(u'.', r'([^\n]+)\n', u'ParseIncomplete', u'INITIAL'), lexer.Token(u'.', r'\n[^\t]', u'ParseIncomplete', u'INITIAL'), lexer.Token(u'S[.]+', r'(.+)', u'ParseString', u''), ] def __init__(self): """Initializes a syslog parser object.""" super(SyslogParser, self).__init__(local_zone=True) # Set the initial year to 0 (fixed in the actual Parse method) self._year_use = 0 self._last_month = 0 # Set some additional attributes. self.attributes[u'reporter'] = u'' self.attributes[u'pid'] = u'' def ParseLine(self, parser_mediator): """Parse a single line from the syslog file. This method extends the one from TextParser slightly, adding the context of the reporter and pid values found inside syslog files. Args: parser_mediator: A parser mediator object (instance of ParserMediator). """ if not self._year_use: self._year_use = parser_mediator.GetEstimatedYear() month_compare = int(self.attributes[u'imonth']) if month_compare and self._last_month > month_compare: self._year_use += 1 self._last_month = int(self.attributes[u'imonth']) self.attributes[u'iyear'] = self._year_use super(SyslogParser, self).ParseLine(parser_mediator) def ParseHostname(self, match=None, **unused_kwargs): """Parses the hostname. This is a callback function for the text parser (lexer) and is called by the STRING_HOST lexer state. Args: match: The regular expression match object. """ self.attributes[u'hostname'] = match.group(1) def ParsePid(self, match=None, **unused_kwargs): """Parses the process identifier (PID). This is a callback function for the text parser (lexer) and is called by the STRING_PID lexer state. Args: match: The regular expression match object. """ # TODO: Change this logic and rather add more Tokens that # fully cover all variations of the various PID stages. line = match.group(1) if line[-1] == ']': splits = line.split(u'[') if len(splits) == 2: self.attributes[u'reporter'], pid = splits else: pid = splits[-1] self.attributes[u'reporter'] = u'['.join(splits[:-1]) try: self.attributes[u'pid'] = int(pid[:-1]) except ValueError: self.attributes[u'pid'] = 0 else: self.attributes[u'reporter'] = line def ParseString(self, match=None, **unused_kwargs): """Parses a (body text) string. This is a callback function for the text parser (lexer) and is called by the STRING lexer state. Args: match: The regular expression match object. """ self.attributes[u'body'] += utils.GetUnicodeString(match.group(1)) def PrintLine(self): """Prints a log line.""" self.attributes[u'iyear'] = 2012 return super(SyslogParser, self).PrintLine() # TODO: this is a rough initial implementation to get this working. def CreateEvent(self, timestamp, offset, attributes): """Creates a syslog line event. This overrides the default function in TextParser to create syslog line events instead of text events. Args: timestamp: The timestamp time value. The timestamp contains the number of microseconds since Jan 1, 1970 00:00:00 UTC. offset: The offset of the event. attributes: A dict that contains the events attributes. Returns: A text event (SyslogLineEvent). """ return SyslogLineEvent(timestamp, offset, attributes)
class SELinuxParser(text_parser.SlowLexicalTextParser): """Parse SELinux audit log files.""" NAME = 'selinux' DESCRIPTION = u'Parser for SELinux audit log files.' PID_RE = re.compile(r'pid=([0-9]+)[\s]+', re.DOTALL) tokens = [ # Skipping empty lines, both EOLs are considered here and in other states. lexer.Token('INITIAL', r'^\r?\n', '', ''), # FSM entry point ('type=anything msg=audit'), critical to recognize a # SELinux audit file and used to retrieve the audit type. From there two # next states are possible: TIME or failure, since TIME state is required. # An empty type is not accepted and it will cause a failure. # Examples: # type=SYSCALL msg=audit(...): ... # type=UNKNOWN[1323] msg=audit(...): ... lexer.Token('INITIAL', r'^type=([\w]+(\[[0-9]+\])?)[ \t]+msg=audit', 'ParseType', 'TIMESTAMP'), lexer.Token('TIMESTAMP', r'\(([0-9]+)\.([0-9]+):([0-9]*)\):', 'ParseTime', 'STRING'), # Get the log entry description and stay in the same state. lexer.Token('STRING', r'[ \t]*([^\r\n]+)', 'ParseString', ''), # Entry parsed. Note that an empty description is managed and it will not # raise a parsing failure. lexer.Token('STRING', r'[ \t]*\r?\n', 'ParseMessage', 'INITIAL'), # The entry is not formatted as expected, so the parsing failed. lexer.Token('.', '([^\r\n]+)\r?\n', 'ParseFailed', 'INITIAL') ] def __init__(self): """Initializes a parser object.""" # Set local_zone to false, since timestamps are UTC. super(SELinuxParser, self).__init__(local_zone=False) self.attributes = {u'audit_type': '', u'pid': '', u'body': ''} self.timestamp = 0 def ParseType(self, match=None, **unused_kwargs): """Parse the audit event type. Args: match: The regular expression match object. """ self.attributes[u'audit_type'] = match.group(1) def ParseTime(self, match=None, **unused_kwargs): """Parse the log timestamp. Args: match: The regular expression match object. """ # TODO: do something with match.group(3) ? try: number_of_seconds = int(match.group(1), 10) timestamp = timelib.Timestamp.FromPosixTime(number_of_seconds) timestamp += int(match.group(2), 10) * 1000 self.timestamp = timestamp except ValueError as exception: logging.error( u'Unable to retrieve timestamp with error: {0:s}'.format( exception)) self.timestamp = 0 raise lexer.ParseError(u'Not a valid timestamp.') def ParseString(self, match=None, **unused_kwargs): """Add a string to the body attribute. This method extends the one from TextParser slightly, searching for the 'pid=[0-9]+' value inside the message body. Args: match: The regular expression match object. """ try: self.attributes[u'body'] += match.group(1) # TODO: fix it using lexer or remove pid parsing. # Indeed this is something that lexer is able to manage, but 'pid' field # is non positional: so, by doing the following step, the FSM is kept # simpler. Left the 'to do' as a reminder of possible refactoring. pid_search = self.PID_RE.search(self.attributes[u'body']) if pid_search: self.attributes[u'pid'] = pid_search.group(1) except IndexError: self.attributes[u'body'] += match.group(0).strip(u'\n') def ParseFailed(self, **unused_kwargs): """Entry parsing failed callback.""" raise lexer.ParseError(u'Unable to parse SELinux log line.') def ParseLine(self, parser_mediator): """Parse a single line from the SELinux audit file. This method extends the one from TextParser slightly, creating a SELinux event with the timestamp (UTC) taken from log entries. Args: parser_mediator: A parser mediator object (instance of ParserMediator). """ if not self.timestamp: raise errors.TimestampNotCorrectlyFormed( u'Unable to parse entry, timestamp not defined.') offset = getattr(self, u'entry_offset', 0) event_object = SELinuxLineEvent(self.timestamp, offset, self.attributes) parser_mediator.ProduceEvent(event_object) self.timestamp = 0
class Parser(lexer.SearchParser): """Parses and generates an AST for a query written in the described language. Examples of valid syntax: size is 40 (name contains "Program Files" AND hash.md5 is "123abc") @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow") """ expression_cls = BasicExpression binary_expression_cls = BinaryExpression context_cls = ContextExpression tokens = [ # Operators and related tokens lexer.Token('INITIAL', r'\@[\w._0-9]+', 'ContextOperator,PushState', 'CONTEXTOPEN'), lexer.Token('INITIAL', r'[^\s\(\)]', 'PushState,PushBack', 'ATTRIBUTE'), lexer.Token('INITIAL', r'\(', 'PushState,BracketOpen', None), lexer.Token('INITIAL', r'\)', 'BracketClose', 'BINARY'), # Context lexer.Token('CONTEXTOPEN', r'\(', 'BracketOpen', 'INITIAL'), # Double quoted string lexer.Token('STRING', '"', 'PopState,StringFinish', None), lexer.Token('STRING', r'\\x(..)', 'HexEscape', None), lexer.Token('STRING', r'\\(.)', 'StringEscape', None), lexer.Token('STRING', r'[^\\"]+', 'StringInsert', None), # Single quoted string lexer.Token('SQ_STRING', '\'', 'PopState,StringFinish', None), lexer.Token('SQ_STRING', r'\\x(..)', 'HexEscape', None), lexer.Token('SQ_STRING', r'\\(.)', 'StringEscape', None), lexer.Token('SQ_STRING', r'[^\\\']+', 'StringInsert', None), # Basic expression lexer.Token('ATTRIBUTE', r'[\w._0-9]+', 'StoreAttribute', 'OPERATOR'), lexer.Token('OPERATOR', r'not ', 'FlipLogic', None), lexer.Token('OPERATOR', r'(\w+|[<>!=]=?)', 'StoreOperator', 'CHECKNOT'), lexer.Token('CHECKNOT', r'not', 'FlipLogic', 'ARG'), lexer.Token('CHECKNOT', r'\s+', None, None), lexer.Token('CHECKNOT', r'([^not])', 'PushBack', 'ARG'), lexer.Token('ARG', r'(\d+\.\d+)', 'InsertFloatArg', 'ARG'), lexer.Token('ARG', r'(0x\d+)', 'InsertInt16Arg', 'ARG'), lexer.Token('ARG', r'(\d+)', 'InsertIntArg', 'ARG'), lexer.Token('ARG', '"', 'PushState,StringStart', 'STRING'), lexer.Token('ARG', '\'', 'PushState,StringStart', 'SQ_STRING'), # When the last parameter from arg_list has been pushed # State where binary operators are supported (AND, OR) lexer.Token('BINARY', r'(?i)(and|or|\&\&|\|\|)', 'BinaryOperator', 'INITIAL'), # - We can also skip spaces lexer.Token('BINARY', r'\s+', None, None), # - But if it's not "and" or just spaces we have to go back lexer.Token('BINARY', '.', 'PushBack,PopState', None), # Skip whitespace. lexer.Token('.', r'\s+', None, None), ] def StoreAttribute(self, string='', **kwargs): self.flipped = False super(Parser, self).StoreAttribute(string, **kwargs) def FlipAllowed(self): """Raise an error if the not keyword is used where it is not allowed.""" if not hasattr(self, 'flipped'): raise errors.ParseError(u'Not defined.') if not self.flipped: return if self.current_expression.operator: if not self.current_expression.operator.lower() in ( 'is', 'contains', 'inset', 'equals'): raise errors.ParseError( u'Keyword \'not\' does not work against operator: {0:s}'. format(self.current_expression.operator)) def FlipLogic(self, **unused_kwargs): """Flip the boolean logic of the expression. If an expression is configured to return True when the condition is met this logic will flip that to False, and vice versa. """ if hasattr(self, 'flipped') and self.flipped: raise errors.ParseError( u'The operator \'not\' can only be expressed once.') if self.current_expression.args: raise errors.ParseError( u'Unable to place the keyword \'not\' after an argument.') self.flipped = True # Check if this flip operation should be allowed. self.FlipAllowed() if hasattr(self.current_expression, 'FlipBool'): self.current_expression.FlipBool() logging.debug(u'Negative matching [flipping boolean logic].') else: logging.warning( u'Unable to perform a negative match, issuing a positive one.') def InsertArg(self, string='', **unused_kwargs): """Insert an arg to the current expression.""" # Note that "string" is not necessarily of type string. logging.debug(u'Storing argument: {0!s}'.format(string)) # Check if this flip operation should be allowed. self.FlipAllowed() # This expression is complete if self.current_expression.AddArg(string): self.stack.append(self.current_expression) self.current_expression = self.expression_cls() # We go to the BINARY state, to find if there's an AND or OR operator return 'BINARY' def InsertFloatArg(self, string='', **unused_kwargs): """Inserts a Float argument.""" try: float_value = float(string) except (TypeError, ValueError): raise errors.ParseError( u'{0:s} is not a valid float.'.format(string)) return self.InsertArg(float_value) def InsertIntArg(self, string='', **unused_kwargs): """Inserts an Integer argument.""" try: int_value = int(string) except (TypeError, ValueError): raise errors.ParseError( u'{0:s} is not a valid integer.'.format(string)) return self.InsertArg(int_value) def InsertInt16Arg(self, string='', **unused_kwargs): """Inserts an Integer in base16 argument.""" try: int_value = int(string, 16) except (TypeError, ValueError): raise errors.ParseError( u'{0:s} is not a valid base16 integer.'.format(string)) return self.InsertArg(int_value) def StringFinish(self, **unused_kwargs): if self.state == 'ATTRIBUTE': return self.StoreAttribute(string=self.string) elif self.state == 'ARG': return self.InsertArg(string=self.string) def StringEscape(self, string, match, **unused_kwargs): """Escape backslashes found inside a string quote. Backslashes followed by anything other than [\'"rnbt.ws] will raise an Error. Args: string: The string that matched. match: the match object (instance of re.MatchObject). Where match.group(1) contains the escaped code. Raises: ParseError: When the escaped string is not one of [\'"rnbt] """ if match.group(1) in '\\\'"rnbt\\.ws': self.string += string.decode('string_escape') else: raise errors.ParseError( u'Invalid escape character {0:s}.'.format(string)) def HexEscape(self, string, match, **unused_kwargs): """Converts a hex escaped string.""" logging.debug(u'HexEscape matched {0:s}.'.format(string)) hex_string = match.group(1) try: self.string += binascii.unhexlify(hex_string) except TypeError: raise errors.ParseError( u'Invalid hex escape {0:s}.'.format(string)) def ContextOperator(self, string='', **unused_kwargs): self.stack.append(self.context_cls(string[1:])) def Reduce(self): """Reduce the token stack into an AST.""" # Check for sanity if self.state != 'INITIAL' and self.state != 'BINARY': self.Error(u'Premature end of expression') length = len(self.stack) while length > 1: # Precendence order self._CombineParenthesis() self._CombineBinaryExpressions('and') self._CombineBinaryExpressions('or') self._CombineContext() # No change if len(self.stack) == length: break length = len(self.stack) if length != 1: self.Error(u'Illegal query expression') return self.stack[0] def Error(self, message=None, _=None): # Note that none of the values necessarily are strings. raise errors.ParseError( u'{0!s} in position {1!s}: {2!s} <----> {3!s} )'.format( message, len(self.processed_buffer), self.processed_buffer, self.buffer)) def _CombineBinaryExpressions(self, operator): for i in range(1, len(self.stack) - 1): item = self.stack[i] if (isinstance(item, lexer.BinaryExpression) and item.operator.lower() == operator.lower() and isinstance(self.stack[i - 1], lexer.Expression) and isinstance(self.stack[i + 1], lexer.Expression)): lhs = self.stack[i - 1] rhs = self.stack[i + 1] self.stack[i].AddOperands(lhs, rhs) self.stack[i - 1] = None self.stack[i + 1] = None self.stack = filter(None, self.stack) def _CombineContext(self): # Context can merge from item 0 for i in range(len(self.stack) - 1, 0, -1): item = self.stack[i - 1] if (isinstance(item, ContextExpression) and isinstance(self.stack[i], lexer.Expression)): expression = self.stack[i] self.stack[i - 1].SetExpression(expression) self.stack[i] = None self.stack = filter(None, self.stack)
class SelectiveLexer(lexer.Lexer): """Selective filter lexer implementation. The selective (or dynamic) filter allow to construct filter expressions like: SELECT field_a, field_b WHERE attribute contains 'text' """ tokens = [ lexer.Token('INITIAL', r'SELECT', '', 'FIELDS'), lexer.Token('FIELDS', r'(.+) WHERE ', 'SetFields', 'FILTER'), lexer.Token('FIELDS', r'(.+) LIMIT', 'SetFields', 'LIMIT_END'), lexer.Token('FIELDS', r'(.+) SEPARATED BY', 'SetFields', 'SEPARATE'), lexer.Token('FIELDS', r'(.+)$', 'SetFields', 'END'), lexer.Token('FILTER', r'(.+) SEPARATED BY', 'SetFilter', 'SEPARATE'), lexer.Token('FILTER', r'(.+) LIMIT', 'SetFilter', 'LIMIT_END'), lexer.Token('FILTER', r'(.+)$', 'SetFilter', 'END'), lexer.Token('SEPARATE', r' ', '', ''), # Ignore white space here. lexer.Token('SEPARATE', r'LIMIT', '', 'LIMIT_END'), lexer.Token( 'SEPARATE', r'[\'"]([^ \'"]+)[\'"] LIMIT', 'SetSeparator', 'LIMIT_END'), lexer.Token( 'SEPARATE', r'[\'"]([^ \'"]+)[\'"]$', 'SetSeparator', 'END'), lexer.Token( 'SEPARATE', r'(.+)$', 'SetSeparator', 'END'), lexer.Token( 'LIMIT_END', r'SEPARATED BY [\'"]([^\'"]+)[\'"]', 'SetSeparator', ''), lexer.Token('LIMIT_END', r'(.+) SEPARATED BY', 'SetLimit', 'SEPARATE'), lexer.Token('LIMIT_END', r'(.+)$', 'SetLimit', 'END')] def __init__(self, data=''): """Initializes a selective lexer object. Args: data: optional initial data to be processed by the lexer. """ super(SelectiveLexer, self).__init__(data=data) self.fields = [] self.limit = 0 self.lex_filter = None self.separator = u',' def SetFields(self, match, **unused_kwargs): """Sets the output fields. The output fields is the part of the filter expression directly following the SELECT statement. Args: match: the match object (instance of re.MatchObject) that contains the output field names. """ text = match.group(1).lower() field_text, _, _ = text.partition(' from ') use_field_text = field_text.replace(' ', '') if ',' in use_field_text: self.fields = use_field_text.split(',') else: self.fields = [use_field_text] def SetFilter(self, match, **unused_kwargs): """Set the filter query. The filter query is the part of the filter expression directly following the WHERE statement. Args: match: the match object (instance of re.MatchObject) that contains the filter query. """ filter_match = match.group(1) if 'LIMIT' in filter_match: # This only occurs in the case where we have "LIMIT X SEPARATED BY". self.lex_filter, _, push_back = filter_match.rpartition('LIMIT') self.PushBack('LIMIT {0:s} SEPARATED BY '.format(push_back)) else: self.lex_filter = filter_match def SetLimit(self, match, **unused_kwargs): """Sets the row limit. Args: match: the match object (instance of re.MatchObject) that contains the row limit. """ try: limit = int(match.group(1)) except ValueError: self.Error('Invalid limit value, should be int [{}] = {}'.format( type(match.group(1)), match.group(1))) limit = 0 self.limit = limit def SetSeparator(self, match, **unused_kwargs): """Sets the output field separator. Args: match: the match object (instance of re.MatchObject) that contains the output field separate. Note that only the first character is used. """ separator = match.group(1) if separator: self.separator = separator[0]
class SlowLexicalTextParser(interface.BaseParser, lexer.SelfFeederMixIn): """Generic text based parser that uses lexer to assist with parsing. This text parser is based on a rather slow lexer, which makes the use of this interface highly discouraged. Parsers that already implement it will most likely all be rewritten to support faster text parsing implementations. This text based parser needs to be extended to provide an accurate list of tokens that define the structure of the log file that the parser is designed for. """ # Define the max number of lines before we determine this is # not the correct parser. MAX_LINES = 15 # List of tokens that describe the structure of the log file. tokens = [ lexer.Token('INITIAL', '(.+)\n', 'ParseString', ''), ] def __init__(self, local_zone=True): """Constructor for the SlowLexicalTextParser. Args: local_zone: A boolean value that determines if the entries in the log file are stored in the local time zone of the computer that stored it or in a fixed timezone, like UTC. """ # TODO: remove the multiple inheritance. lexer.SelfFeederMixIn.__init__(self) interface.BaseParser.__init__(self) self.line_ready = False self.attributes = { 'body': '', 'iyear': 0, 'imonth': 0, 'iday': 0, 'time': '', 'hostname': '', 'username': '', } self.local_zone = local_zone self.file_entry = None def ClearValues(self): """Clears all the values inside the attributes dict. All values that start with the letter 'i' are considered to be an integer, otherwise string value is assumed. """ self.line_ready = False for attr in self.attributes: if attr[0] == 'i': self.attributes[attr] = 0 else: self.attributes[attr] = '' def ParseIncomplete(self, match=None, **unused_kwargs): """Indication that we've got a partial line to match against. Args: match: The regular expression match object. """ self.attributes['body'] += match.group(0) self.line_ready = True def ParseMessage(self, **unused_kwargs): """Signal that a line is ready to be parsed.""" self.line_ready = True def SetMonth(self, match=None, **unused_kwargs): """Parses the month. This is a callback function for the text parser (lexer) and is called by the corresponding lexer state. Args: match: The regular expression match object. """ self.attributes['imonth'] = int( timelib.MONTH_DICT.get(match.group(1).lower(), 1)) def SetDay(self, match=None, **unused_kwargs): """Parses the day of the month. This is a callback function for the text parser (lexer) and is called by the corresponding lexer state. Args: match: The regular expression match object. """ self.attributes['iday'] = int(match.group(1)) def SetTime(self, match=None, **unused_kwargs): """Set the time attribute. Args: match: The regular expression match object. """ self.attributes['time'] = match.group(1) def SetYear(self, match=None, **unused_kwargs): """Parses the year. This is a callback function for the text parser (lexer) and is called by the corresponding lexer state. Args: match: The regular expression match object. """ self.attributes['iyear'] = int(match.group(1)) def Parse(self, parser_context, file_entry): """Extract data from a text file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). Yields: An event object (instance of EventObject). """ path_spec_printable = u'{0:s}:{1:s}'.format( file_entry.path_spec.type_indicator, file_entry.name) file_object = file_entry.GetFileObject() self.file_entry = file_entry # TODO: this is necessary since we inherit from lexer.SelfFeederMixIn. self.file_object = file_object # Start by checking, is this a text file or not? Before we proceed # any further. file_object.seek(0, os.SEEK_SET) if not utils.IsText(file_object.read(40)): raise errors.UnableToParseFile( u'Not a text file, unable to proceed.') file_object.seek(0, os.SEEK_SET) error_count = 0 file_verified = False # We need to clear out few values in the Lexer before continuing. # There might be some leftovers from previous run. self.error = 0 self.buffer = '' while True: _ = self.NextToken() if self.state == 'INITIAL': self.entry_offset = getattr(self, 'next_entry_offset', 0) self.next_entry_offset = file_object.tell() - len(self.buffer) if not file_verified and self.error >= self.MAX_LINES * 2: logging.debug( u'Lexer error count: {0:d} and current state {1:s}'.format( self.error, self.state)) file_object.close() raise errors.UnableToParseFile( u'[{0:s}] unsupported file: {1:s}.'.format( self.NAME, path_spec_printable)) if self.line_ready: try: event_object = self.ParseLine(parser_context) parser_context.ProduceEvent(event_object, parser_name=self.NAME, file_entry=file_entry) file_verified = True except errors.TimestampNotCorrectlyFormed as exception: error_count += 1 if file_verified: logging.debug( u'[{0:s} VERIFIED] Error count: {1:d} and ERROR: {2:d}' .format(path_spec_printable, error_count, self.error)) logging.warning( u'[{0:s}] Unable to parse timestamp with error: {1:s}' .format(self.NAME, exception)) else: logging.debug(( u'[{0:s} EVALUATING] Error count: {1:d} and ERROR: ' u'{2:d})').format(path_spec_printable, error_count, self.error)) if error_count >= self.MAX_LINES: file_object.close() raise errors.UnableToParseFile( u'[{0:s}] unsupported file: {1:s}.'.format( self.NAME, path_spec_printable)) finally: self.ClearValues() if self.Empty(): # Try to fill the buffer to prevent the parser from ending prematurely. self.Feed() if self.Empty(): break if not file_verified: file_object.close() raise errors.UnableToParseFile( u'[{0:s}] unable to parser file: {1:s}.'.format( self.NAME, path_spec_printable)) file_offset = file_object.get_offset() if file_offset < file_object.get_size(): logging.error( (u'{0:s} prematurely terminated parsing: {1:s} at offset: ' u'0x{2:08x}.').format(self.NAME, path_spec_printable, file_offset)) file_object.close() def ParseString(self, match=None, **unused_kwargs): """Return a string with combined values from the lexer. Args: match: The regular expression match object. Returns: A string that combines the values that are so far saved from the lexer. """ try: self.attributes['body'] += match.group(1).strip('\n') except IndexError: self.attributes['body'] += match.group(0).strip('\n') def PrintLine(self): """"Return a string with combined values from the lexer.""" year = getattr(self.attributes, 'iyear', None) month = getattr(self.attributes, 'imonth', None) day = getattr(self.attributes, 'iday', None) if None in [year, month, day]: date_string = u'[DATE NOT SET]' else: try: year = int(year, 10) month = int(month, 10) day = int(day, 10) date_string = u'{0:04d}-{1:02d}-{2:02d}'.format( year, month, day) except ValueError: date_string = u'[DATE INVALID]' time_string = getattr(self.attributes, 'time', u'[TIME NOT SET]') hostname_string = getattr(self.attributes, 'hostname', u'HOSTNAME NOT SET') reporter_string = getattr(self.attributes, 'reporter', u'[REPORTER NOT SET]') body_string = getattr(self.attributes, 'body', u'[BODY NOT SET]') # TODO: this is a work in progress. The reason for the try-catch is that # the text parser is handed a non-text file and must deal with converting # arbitrary binary data. try: line = u'{0:s} {1:s} [{2:s}] {3:s} => {4:s}'.format( date_string, time_string, hostname_string, reporter_string, body_string) except UnicodeError: line = 'Unable to print line - due to encoding error.' return line def ParseLine(self, parser_context): """Return an event object extracted from the current line. Args: parser_context: A parser context object (instance of ParserContext). Returns: An event object (instance of TextEvent). """ if not self.attributes['time']: raise errors.TimestampNotCorrectlyFormed( u'Unable to parse timestamp, time not set.') if not self.attributes['iyear']: raise errors.TimestampNotCorrectlyFormed( u'Unable to parse timestamp, year not set.') times = self.attributes['time'].split(':') if self.local_zone: timezone = parser_context.timezone else: timezone = pytz.UTC if len(times) < 3: raise errors.TimestampNotCorrectlyFormed( (u'Unable to parse timestamp, not of the format HH:MM:SS ' u'[{0:s}]').format(self.PrintLine())) try: secs = times[2].split('.') if len(secs) == 2: sec, us = secs else: sec = times[2] us = 0 timestamp = timelib.Timestamp.FromTimeParts( int(self.attributes['iyear']), self.attributes['imonth'], self.attributes['iday'], int(times[0]), int(times[1]), int(sec), microseconds=int(us), timezone=timezone) except ValueError as exception: raise errors.TimestampNotCorrectlyFormed( u'Unable to parse: {0:s} with error: {1:s}'.format( self.PrintLine(), exception)) return self.CreateEvent(timestamp, getattr(self, 'entry_offset', 0), self.attributes) # TODO: this is a rough initial implementation to get this working. def CreateEvent(self, timestamp, offset, attributes): """Creates an event. This function should be overwritten by text parsers that require to generate specific event object type, the default is TextEvent. Args: timestamp: The timestamp time value. The timestamp contains the number of microseconds since Jan 1, 1970 00:00:00 UTC. offset: The offset of the event. attributes: A dict that contains the events attributes. Returns: An event object (instance of TextEvent). """ return text_events.TextEvent(timestamp, offset, attributes)
class SelectiveLexer(lexer.Lexer): """A simple selective filter lexer implementation.""" tokens = [ lexer.Token('INITIAL', r'SELECT', '', 'FIELDS'), lexer.Token('FIELDS', r'(.+) WHERE ', 'SetFields', 'FILTER'), lexer.Token('FIELDS', r'(.+) LIMIT', 'SetFields', 'LIMIT_END'), lexer.Token('FIELDS', r'(.+) SEPARATED BY', 'SetFields', 'SEPARATE'), lexer.Token('FIELDS', r'(.+)$', 'SetFields', 'END'), lexer.Token('FILTER', r'(.+) SEPARATED BY', 'SetFilter', 'SEPARATE'), lexer.Token('FILTER', r'(.+) LIMIT', 'SetFilter', 'LIMIT_END'), lexer.Token('FILTER', r'(.+)$', 'SetFilter', 'END'), lexer.Token('SEPARATE', r' ', '', ''), # Ignore white space here. lexer.Token('SEPARATE', r'LIMIT', '', 'LIMIT_END'), lexer.Token( 'SEPARATE', r'[\'"]([^ \'"]+)[\'"] LIMIT', 'SetSeparator', 'LIMIT_END'), lexer.Token( 'SEPARATE', r'[\'"]([^ \'"]+)[\'"]$', 'SetSeparator', 'END'), lexer.Token( 'SEPARATE', r'(.+)$', 'SetSeparator', 'END'), lexer.Token( 'LIMIT_END', r'SEPARATED BY [\'"]([^\'"]+)[\'"]', 'SetSeparator', ''), lexer.Token('LIMIT_END', r'(.+) SEPARATED BY', 'SetLimit', 'SEPARATE'), lexer.Token('LIMIT_END', r'(.+)$', 'SetLimit', 'END')] def __init__(self, data=''): """Initialize the lexer.""" self.fields = [] self.limit = 0 self.lex_filter = None self.separator = u',' super(SelectiveLexer, self).__init__(data) def SetFilter(self, match, **_): """Set the filter query.""" filter_match = match.group(1) if 'LIMIT' in filter_match: # This only occurs in the case where we have "LIMIT X SEPARATED BY". self.lex_filter, _, push_back = filter_match.rpartition('LIMIT') self.PushBack('LIMIT {} SEPARATED BY '.format(push_back)) else: self.lex_filter = filter_match def SetSeparator(self, match, **_): """Set the separator of the output, only uses the first char.""" separator = match.group(1) if separator: self.separator = separator[0] def SetLimit(self, match, **_): """Set the row limit.""" try: limit = int(match.group(1)) except ValueError: self.Error('Invalid limit value, should be int [{}] = {}'.format( type(match.group(1)), match.group(1))) limit = 0 self.limit = limit def SetFields(self, match, **_): """Set the selective fields.""" text = match.group(1).lower() field_text, _, _ = text.partition(' from ') use_field_text = field_text.replace(' ', '') if ',' in use_field_text: self.fields = use_field_text.split(',') else: self.fields = [use_field_text]
class SyslogParser(text_parser.SlowLexicalTextParser): """Parse text based syslog files.""" NAME = 'syslog' DESCRIPTION = u'Parser for syslog files.' # TODO: can we change this similar to SQLite where create an # event specific object for different lines using a callback function. # Define the tokens that make up the structure of a syslog file. tokens = [ lexer.Token('INITIAL', '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ', 'SetMonth', 'DAY'), lexer.Token('DAY', r'\s?(\d{1,2})\s+', 'SetDay', 'TIME'), lexer.Token('TIME', r'([0-9:\.]+) ', 'SetTime', 'STRING_HOST'), lexer.Token('STRING_HOST', r'^--(-)', 'ParseHostname', 'STRING'), lexer.Token('STRING_HOST', r'([^\s]+) ', 'ParseHostname', 'STRING_PID'), lexer.Token('STRING_PID', r'([^\:\n]+)', 'ParsePid', 'STRING'), lexer.Token('STRING', r'([^\n]+)', 'ParseString', ''), lexer.Token('STRING', r'\n\t', None, ''), lexer.Token('STRING', r'\t', None, ''), lexer.Token('STRING', r'\n', 'ParseMessage', 'INITIAL'), lexer.Token('.', '([^\n]+)\n', 'ParseIncomplete', 'INITIAL'), lexer.Token('.', '\n[^\t]', 'ParseIncomplete', 'INITIAL'), lexer.Token('S[.]+', '(.+)', 'ParseString', ''), ] def __init__(self): """Initializes a syslog parser object.""" super(SyslogParser, self).__init__(local_zone=True) # Set the initial year to 0 (fixed in the actual Parse method) self._year_use = 0 self._last_month = 0 # Set some additional attributes. self.attributes['reporter'] = '' self.attributes['pid'] = '' def _GetYear(self, stat, timezone): """Retrieves the year either from the input file or from the settings.""" time = getattr(stat, 'crtime', 0) if not time: time = getattr(stat, 'ctime', 0) if not time: current_year = timelib.GetCurrentYear() logging.error( (u'Unable to determine year of syslog file.\nDefaulting to: ' u'{0:d}').format(current_year)) return current_year try: timestamp = datetime.datetime.fromtimestamp(time, timezone) except ValueError as exception: current_year = timelib.GetCurrentYear() logging.error( u'Unable to determine year of syslog file with error: {0:s}\n' u'Defaulting to: {1:d}'.format(exception, current_year)) return current_year return timestamp.year def ParseLine(self, parser_mediator): """Parse a single line from the syslog file. This method extends the one from TextParser slightly, adding the context of the reporter and pid values found inside syslog files. Args: parser_mediator: A parser mediator object (instance of ParserMediator). Returns: An event object (instance of TextEvent). """ # Note this an older comment applying to a similar approach previously # the init function. # TODO: this is a HACK to get the tests working let's discuss this. if not self._year_use: self._year_use = parser_mediator.year if not self._year_use: # TODO: Find a decent way to actually calculate the correct year # from the syslog file, instead of relying on stats object. stat = self.file_entry.GetStat() self._year_use = self._GetYear(stat, parser_mediator.timezone) if not self._year_use: # TODO: Make this sensible, not have the year permanent. self._year_use = 2012 month_compare = int(self.attributes['imonth']) if month_compare and self._last_month > month_compare: self._year_use += 1 self._last_month = int(self.attributes['imonth']) self.attributes['iyear'] = self._year_use return super(SyslogParser, self).ParseLine(parser_mediator) def ParseHostname(self, match=None, **unused_kwargs): """Parses the hostname. This is a callback function for the text parser (lexer) and is called by the STRING_HOST lexer state. Args: match: The regular expression match object. """ self.attributes['hostname'] = match.group(1) def ParsePid(self, match=None, **unused_kwargs): """Parses the process identifier (PID). This is a callback function for the text parser (lexer) and is called by the STRING_PID lexer state. Args: match: The regular expression match object. """ # TODO: Change this logic and rather add more Tokens that # fully cover all variations of the various PID stages. line = match.group(1) if line[-1] == ']': splits = line.split('[') if len(splits) == 2: self.attributes['reporter'], pid = splits else: pid = splits[-1] self.attributes['reporter'] = '['.join(splits[:-1]) try: self.attributes['pid'] = int(pid[:-1]) except ValueError: self.attributes['pid'] = 0 else: self.attributes['reporter'] = line def ParseString(self, match=None, **unused_kwargs): """Parses a (body text) string. This is a callback function for the text parser (lexer) and is called by the STRING lexer state. Args: match: The regular expression match object. """ self.attributes['body'] += utils.GetUnicodeString(match.group(1)) def PrintLine(self): """Prints a log line.""" self.attributes['iyear'] = 2012 return super(SyslogParser, self).PrintLine() # TODO: this is a rough initial implementation to get this working. def CreateEvent(self, timestamp, offset, attributes): """Creates a syslog line event. This overrides the default function in TextParser to create syslog line events instead of text events. Args: timestamp: The timestamp time value. The timestamp contains the number of microseconds since Jan 1, 1970 00:00:00 UTC. offset: The offset of the event. attributes: A dict that contains the events attributes. Returns: A text event (SyslogLineEvent). """ return SyslogLineEvent(timestamp, offset, attributes)
class SlowLexicalTextParser(interface.FileObjectParser, lexer.SelfFeederMixIn): """Generic text based parser that uses lexer to assist with parsing. This text parser is based on a rather slow lexer, which makes the use of this interface highly discouraged. Parsers that already implement it will most likely all be rewritten to support faster text parsing implementations. This text based parser needs to be extended to provide an accurate list of tokens that define the structure of the log file that the parser is designed for. """ _INITIAL_FILE_OFFSET = None # Define the max number of lines before we determine this is # not the correct parser. MAX_LINES = 15 # List of tokens that describe the structure of the log file. tokens = [ lexer.Token(u'INITIAL', r'(.+)\n', u'ParseString', u''), ] def __init__(self, local_zone=True): """Constructor for the SlowLexicalTextParser. Args: local_zone: a boolean value that determines if the entries in the log file are stored in the local time zone of the computer that stored it or in a fixed timezone, like UTC. """ # TODO: remove the multiple inheritance. lexer.SelfFeederMixIn.__init__(self) interface.FileObjectParser.__init__(self) self._file_verified = False self.attributes = { u'body': u'', u'iyear': 0, u'imonth': 0, u'iday': 0, u'time': u'', u'hostname': u'', u'username': u'', } self.entry_offset = None self.line_ready = False self.local_zone = local_zone self.next_entry_offset = 0 def ClearValues(self): """Clears all the values inside the attributes dict. All values that start with the letter 'i' are considered to be an integer, otherwise string value is assumed. """ self.line_ready = False for attr in self.attributes: if attr.startswith(u'i'): self.attributes[attr] = 0 else: self.attributes[attr] = u'' def CreateEvent(self, timestamp, offset, attributes): """Creates an event. This function should be overwritten by text parsers that required the generation of specific event object type, the default event type is TextEvent. Args: timestamp: the timestamp time value. The timestamp contains the number of microseconds since Jan 1, 1970 00:00:00 UTC. offset: the offset of the event. attributes: a dictionary that contains the event's attributes. Returns: An event object (instance of TextEvent). """ return text_events.TextEvent(timestamp, offset, attributes) def ParseIncomplete(self, match=None, **unused_kwargs): """Parse a partial line match and append to the body attribute. Args: match: optional regular expression match object (instance of SRE_Match). """ if not match: return try: self.attributes[u'body'] += match.group(0) except UnicodeDecodeError: # TODO: Support other encodings than UTF-8 here, read from the # knowledge base or parse from the file itself. self.attributes[u'body'] += u'{0:s}'.format( match.group(0).decode(u'utf-8', errors=u'replace')) self.line_ready = True def ParseMessage(self, **unused_kwargs): """Signal that a line is ready to be parsed.""" self.line_ready = True def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a text file-like object using a lexer. Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_entry = parser_mediator.GetFileEntry() path_spec_printable = u'{0:s}:{1:s}'.format( file_entry.path_spec.type_indicator, file_entry.name) # TODO: this is necessary since we inherit from lexer.SelfFeederMixIn. self.file_object = file_object self._file_verified = False # Start by checking, is this a text file or not? Before we proceed # any further. file_object.seek(0, os.SEEK_SET) if not utils.IsText(file_object.read(40)): raise errors.UnableToParseFile( u'Not a text file, unable to proceed.') file_object.seek(0, os.SEEK_SET) error_count = 0 # We need to clear out few values in the Lexer before continuing. # There might be some leftovers from previous run. self.error = 0 self.buffer = b'' while True: _ = self.NextToken() if self.state == u'INITIAL': self.entry_offset = self.next_entry_offset self.next_entry_offset = file_object.tell() - len(self.buffer) if not self._file_verified and self.error >= self.MAX_LINES * 2: logging.debug( u'Lexer error count: {0:d} and current state {1:s}'.format( self.error, self.state)) raise errors.UnableToParseFile( u'[{0:s}] unsupported file: {1:s}.'.format( self.NAME, path_spec_printable)) if self.line_ready: try: self.ParseLine(parser_mediator) self._file_verified = True except errors.TimestampError as exception: error_count += 1 if self._file_verified: logging.debug( u'[{0:s} VERIFIED] Error count: {1:d} and ERROR: {2:d}' .format(path_spec_printable, error_count, self.error)) logging.warning( u'[{0:s}] Unable to parse timestamp with error: {1:s}' .format(self.NAME, exception)) else: logging.debug(( u'[{0:s} EVALUATING] Error count: {1:d} and ERROR: ' u'{2:d})').format(path_spec_printable, error_count, self.error)) if error_count >= self.MAX_LINES: raise errors.UnableToParseFile( u'[{0:s}] unsupported file: {1:s}.'.format( self.NAME, path_spec_printable)) finally: self.ClearValues() if self.Empty(): # Try to fill the buffer to prevent the parser from ending prematurely. self.Feed() if self.Empty(): break if not self._file_verified: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file: {1:s}.'.format( self.NAME, path_spec_printable)) file_offset = file_object.get_offset() if file_offset < file_object.get_size(): parser_mediator.ProduceParseError( (u'{0:s} prematurely terminated parsing: {1:s} at offset: ' u'0x{2:08x}.').format(self.NAME, path_spec_printable, file_offset)) def ParseString(self, match=None, **unused_kwargs): """Return a string with combined values from the lexer. Args: match: optional regular expression match object (instance of SRE_Match). Returns: A string that combines the values that are so far saved from the lexer. """ try: self.attributes[u'body'] += match.group(1).strip(u'\n') except IndexError: self.attributes[u'body'] += match.group(0).strip(u'\n') def PrintLine(self): """"Return a string with combined values from the lexer.""" year = getattr(self.attributes, u'iyear', None) month = getattr(self.attributes, u'imonth', None) day = getattr(self.attributes, u'iday', None) if None in [year, month, day]: date_string = u'[DATE NOT SET]' else: try: year = int(year, 10) month = int(month, 10) day = int(day, 10) date_string = u'{0:04d}-{1:02d}-{2:02d}'.format( year, month, day) except ValueError: date_string = u'[DATE INVALID]' time_string = getattr(self.attributes, u'time', u'[TIME NOT SET]') hostname_string = getattr(self.attributes, u'hostname', u'HOSTNAME NOT SET') reporter_string = getattr(self.attributes, u'reporter', u'[REPORTER NOT SET]') body_string = getattr(self.attributes, u'body', u'[BODY NOT SET]') # TODO: this is a work in progress. The reason for the try-catch is that # the text parser is handed a non-text file and must deal with converting # arbitrary binary data. try: line = u'{0:s} {1:s} [{2:s}] {3:s} => {4:s}'.format( date_string, time_string, hostname_string, reporter_string, body_string) except UnicodeError: line = u'Unable to print line - due to encoding error.' return line def ParseLine(self, parser_mediator): """Parses the current log line for events. Args: parser_mediator: a parser mediator object (instance of ParserMediator). """ year_string = self.attributes.get(u'iyear') if not year_string: if not self._file_verified: raise errors.UnableToParseFile() parser_mediator.ProduceParseError( u'year missing in log line: {0:s}'.format(self.PrintLine())) return time_string = self.attributes.get(u'time') if not time_string: if not self._file_verified: raise errors.UnableToParseFile() parser_mediator.ProduceParseError( u'time values missing in log line: {0:s}'.format( self.PrintLine())) return time_values = time_string.split(u':') if len(time_values) < 3: if not self._file_verified: raise errors.UnableToParseFile() parser_mediator.ProduceParseError( u'unsupported time format in log line: {0:s}'.format( self.PrintLine())) return seconds_values = time_values[2].split(u'.') if len(seconds_values) == 2: seconds_string, microseconds_string = seconds_values else: seconds_string = time_values[2] microseconds_string = 0 try: # TODO: fix the need to convert non string values into integers and # string to integer conversion without an explicit base. year = int(year_string) hours = int(time_values[0]) minutes = int(time_values[1]) seconds = int(seconds_string) microseconds = int(microseconds_string) except ValueError as exception: if not self._file_verified: raise errors.UnableToParseFile() parser_mediator.ProduceParseError( u'unable to parse log line: {0:s} with error: {1:s}'.format( self.PrintLine(), exception)) return if self.local_zone: timezone = parser_mediator.timezone else: timezone = pytz.UTC try: timestamp = timelib.Timestamp.FromTimeParts( year, self.attributes[u'imonth'], self.attributes[u'iday'], hours, minutes, seconds, microseconds=microseconds, timezone=timezone) except errors.TimestampError as exception: timestamp = timelib.Timestamp.NONE_TIMESTAMP parser_mediator.ProduceParseError( u'unable to determine timestamp with error: {0:s}'.format( exception)) event_object = self.CreateEvent(timestamp, getattr(self, u'entry_offset', 0), self.attributes) parser_mediator.ProduceEvent(event_object) def SetDay(self, match=None, **unused_kwargs): """Parses the day of the month. This is a callback function for the text parser (lexer) and is called by the corresponding lexer state. Args: match: optional regular expression match object (instance of SRE_Match). """ self.attributes[u'iday'] = int(match.group(1)) def SetMonth(self, match=None, **unused_kwargs): """Parses the month. This is a callback function for the text parser (lexer) and is called by the corresponding lexer state. Args: match: optional regular expression match object (instance of SRE_Match). """ self.attributes[u'imonth'] = int( timelib.MONTH_DICT.get(match.group(1).lower(), 1)) def SetTime(self, match=None, **unused_kwargs): """Set the time attribute. Args: match: optional regular expression match object (instance of SRE_Match). """ self.attributes[u'time'] = match.group(1) def SetYear(self, match=None, **unused_kwargs): """Parses the year. This is a callback function for the text parser (lexer) and is called by the corresponding lexer state. Args: match: optional regular expression match object (instance of SRE_Match). """ self.attributes[u'iyear'] = int(match.group(1))