def setBotName(newname): botname << CL(newname) identifier = P.Word(P.alphas + "_", P.alphanums + "_").setResultsName('identifier') command_leader = L(".") hail = (botname + P.oneOf(": ,")) | (botname + P.White()) command_args = P.restOfLine.setResultsName('command_args') command = ( P.StringStart() + Sup(command_leader | hail) + identifier.setResultsName('command_identifier') + Sup(P.Optional(P.White())) + command_args.setResultsName('command_args')).setResultsName('command') _test_commands = [ (".hello", "['hello', '']"), # {{{ (".foo bar", "['foo', 'bar']"), (". foo", "['foo', '']"), ("..foo", P.ParseException), ("TestBot:foo", "['foo', '']"), ("tesTBot,foo", "['foo', '']"), ("TestBot foo", "['foo', '']"), ("TestBot: foo", "['foo', '']"), ("tesTBot, foo", "['foo', '']"), ("tesTBotfoo", P.ParseException), ] # }}}
# possible operands: # - hexadecimal number # - decimal number # - identifier # - macro function, which is basically expanded via #define # to an expression __numlitl = pypa.Literal('l').suppress() | pypa.Literal('L').suppress() __numlitu = pypa.Literal('u').suppress() | pypa.Literal('U').suppress() __string = pypa.QuotedString('\'', '\\') __hexadec = \ pypa.Literal('0x').suppress() + \ pypa.Word(pypa.hexnums). \ setParseAction(lambda t: str(int(t[0], 16))) + \ pypa.Optional(__numlitu) + \ pypa.Optional(__numlitl) + \ pypa.Optional(__numlitl) __integer = \ pypa.Optional('~') + \ pypa.Word(pypa.nums + '-').setParseAction(lambda t: str(int(t[0]))) + \ pypa.Optional(pypa.Suppress(pypa.Literal('U'))) + \ pypa.Optional(pypa.Suppress(pypa.Literal('L'))) + \ pypa.Optional(pypa.Suppress(pypa.Literal('L'))) __identifier = \ pypa.Word(pypa.alphanums + '_' + '-' + '@' + '$').setParseAction(_collectDefines) __arg = pypa.Word(pypa.alphanums + '_') __args = __arg + pypa.ZeroOrMore(pypa.Literal(',').suppress() + \ __arg)
return pypar.Regex(r"[^\s\n" + re.escape(disallowed_delimiter) + r"]+") pypar.ParserElement.setDefaultWhitespaceChars(" \t") table_parser = pypar.NoMatch() table_cell_separators = ["|", "/", ","] for separator in table_cell_separators: value = pypar.Combine(word_token_regex(separator) * (0, 10), joinString=' ', adjacent=False) value.setParseAction(lambda start, tokens: (start, tokens[0])) empty = pypar.Empty() empty.setParseAction(lambda start, tokens: (start, tokens)) value = pypar.Group(value + empty) row = pypar.Group( pypar.Optional(separator).suppress() + (value + pypar.Literal(separator).suppress()) * (1, None) + pypar.Optional(value) + (pypar.StringEnd() | pypar.Literal("\n")).suppress() + pypar.Optional("\n").suppress()) table_parser ^= ( (pypar.LineStart() + pypar.Optional(pypar.White())).suppress() + # Allow line breaks for table headings row + pypar.Optional( pypar.Regex(r"[\-_=]{3,}") + pypar.Literal("\n") * (1, 2)).suppress() + row * (0, None)).setResultsName("delimiter:" + separator) table_parser.parseWithTabs() key_value_separators = [":", "-", ">"] key_value_list_parser = pypar.NoMatch()
import sys import numpy as np import pyparsing import configuration_space # Build pyparsing expressions for params pp_param_name = pyparsing.Word(pyparsing.alphanums + "_" + "-" + "@" + "." + ":" + ";" + "\\" + "/" + "?" + "!" + "$" + "%" + "&" + "*" + "+" + "<" + ">") pp_digits = "0123456789" pp_plusorminus = pyparsing.Literal('+') | pyparsing.Literal('-') pp_int = pyparsing.Combine( pyparsing.Optional(pp_plusorminus) + pyparsing.Word(pp_digits)) pp_float = pyparsing.Combine( pyparsing.Optional(pp_plusorminus) + pyparsing.Optional(pp_int) + "." + pp_int) pp_eorE = pyparsing.Literal('e') | pyparsing.Literal('E') pp_e_notation = pyparsing.Combine(pp_float + pp_eorE + pp_int) pp_number = pp_e_notation | pp_float | pp_int pp_numberorname = pp_number | pp_param_name pp_il = pyparsing.Word("il") pp_choices = pp_param_name + pyparsing.Optional( pyparsing.OneOrMore("," + pp_param_name)) pp_cont_param = pp_param_name + "[" + pp_number + "," + pp_number + "]" + \ "[" + pp_number + "]" + pyparsing.Optional(pp_il) pp_cat_param = pp_param_name + "{" + pp_choices + "}" + "[" + pp_param_name + "]" pp_condition = pp_param_name + "|" + pp_param_name + "in" + "{" + pp_choices + "}"
class MacOSSecuritydLogParser(text_parser.PyparsingSingleLineTextParser): """Parses the securityd file that contains logs from the security daemon.""" NAME = 'mac_securityd' DESCRIPTION = 'Parser for MacOS securityd log files.' _ENCODING = 'utf-8' _DEFAULT_YEAR = 2012 DATE_TIME = pyparsing.Group( text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + text_parser.PyparsingConstants.TIME_ELEMENTS) SECURITYD_LINE = ( DATE_TIME.setResultsName('date_time') + pyparsing.CharsNotIn('[').setResultsName('sender') + pyparsing.Literal('[').suppress() + text_parser.PyparsingConstants.PID.setResultsName('sender_pid') + pyparsing.Literal(']').suppress() + pyparsing.Literal('<').suppress() + pyparsing.CharsNotIn('>').setResultsName('level') + pyparsing.Literal('>').suppress() + pyparsing.Literal('[').suppress() + pyparsing.CharsNotIn('{').setResultsName('facility') + pyparsing.Literal('{').suppress() + pyparsing.Optional(pyparsing.CharsNotIn( '}').setResultsName('security_api')) + pyparsing.Literal('}').suppress() + pyparsing.Optional(pyparsing.CharsNotIn(']:').setResultsName( 'caller')) + pyparsing.Literal(']:').suppress() + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('message')) REPEATED_LINE = ( DATE_TIME.setResultsName('date_time') + pyparsing.Literal('--- last message repeated').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('times') + pyparsing.Literal('time ---').suppress()) LINE_STRUCTURES = [ ('logline', SECURITYD_LINE), ('repeated', REPEATED_LINE)] def __init__(self): """Initializes a parser object.""" super(MacOSSecuritydLogParser, self).__init__() self._last_month = None self._previous_structure = None self._year_use = 0 def _GetTimeElementsTuple(self, structure): """Retrieves a time elements tuple from the structure. Args: structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Returns: tuple: containing: year (int): year. month (int): month, where 1 represents January. day_of_month (int): day of month, where 1 is the first day of the month. hours (int): hours. minutes (int): minutes. seconds (int): seconds. """ time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') # TODO: what if time_elements_tuple is None. month, day, hours, minutes, seconds = time_elements_tuple # Note that dfdatetime_time_elements.TimeElements will raise ValueError # for an invalid month. month = timelib.MONTH_DICT.get(month.lower(), 0) if month != 0 and month < self._last_month: # Gap detected between years. self._year_use += 1 return (self._year_use, month, day, hours, minutes, seconds) def _ParseLogLine(self, parser_mediator, structure, key): """Parse a single log line and produce an event object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. key (str): name of the parsed structure. """ time_elements_tuple = self._GetTimeElementsTuple(structure) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return self._last_month = time_elements_tuple[1] if key == 'logline': self._previous_structure = structure message = self._GetValueFromStructure(structure, 'message') else: repeat_count = self._GetValueFromStructure(structure, 'times') previous_message = self._GetValueFromStructure( self._previous_structure, 'message') message = 'Repeated {0:d} times: {1:s}'.format( repeat_count, previous_message) structure = self._previous_structure # It uses CarsNotIn structure which leaves whitespaces # at the beginning of the sender and the caller. caller = self._GetValueFromStructure(structure, 'caller') if caller: caller = caller.strip() # TODO: move this to formatter. if not caller: caller = 'unknown' sender = self._GetValueFromStructure(structure, 'sender') if sender: sender = sender.strip() event_data = MacOSSecuritydLogEventData() event_data.caller = caller event_data.facility = self._GetValueFromStructure(structure, 'facility') event_data.level = self._GetValueFromStructure(structure, 'level') event_data.message = message event_data.security_api = self._GetValueFromStructure( structure, 'security_api', default_value='unknown') event_data.sender_pid = self._GetValueFromStructure(structure, 'sender_pid') event_data.sender = sender event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in ('logline', 'repeated'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) self._ParseLogLine(parser_mediator, structure, key) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a securityd log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() try: structure = self.SECURITYD_LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a MacOS securityd log file') return False time_elements_tuple = self._GetTimeElementsTuple(structure) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a MacOS securityd log file, invalid date and time: {0!s}'.format( time_elements_tuple)) return False self._last_month = time_elements_tuple[1] return True
class OptsSpec(Parser): """ An OptsSpec is a string specification that describes an OptionTree. It is a list of tree path specifications (using dotted syntax) separated by keyword lists for any of the style, plotting or normalization options. These keyword lists are denoted 'plot(..)', 'style(...)' and 'norm(...)' respectively. These three groups may be specified even more concisely using keyword lists delimited by square brackets, parentheses and braces respectively. All these sets are optional and may be supplied in any order. For instance, the following string: Image (interpolation=None) plot(show_title=False) Curve style(color='r') Would specify an OptionTree where Image has "interpolation=None" for style and 'show_title=False' for plot options. The Curve has a style set such that color='r'. The parser is fairly forgiving; commas between keywords are optional and additional spaces are often allowed. The only restriction is that keywords *must* be immediately followed by the '=' sign (no space). """ plot_options_short = pp.nestedExpr( '[', ']', content=pp.OneOrMore(pp.Word(allowed) ^ pp.quotedString)).setResultsName('plot_options') plot_options_long = pp.nestedExpr( opener='plot[', closer=']', content=pp.OneOrMore(pp.Word(allowed) ^ pp.quotedString)).setResultsName('plot_options') plot_options = (plot_options_short | plot_options_long) style_options_short = pp.nestedExpr( opener='(', closer=')', ignoreExpr=None).setResultsName("style_options") style_options_long = pp.nestedExpr( opener='style(', closer=')', ignoreExpr=None).setResultsName("style_options") style_options = (style_options_short | style_options_long) norm_options_short = pp.nestedExpr( opener='{', closer='}', ignoreExpr=None).setResultsName("norm_options") norm_options_long = pp.nestedExpr( opener='norm{', closer='}', ignoreExpr=None).setResultsName("norm_options") norm_options = (norm_options_short | norm_options_long) compositor_ops = pp.MatchFirst( [pp.Literal(el.group) for el in Compositor.definitions]) dotted_path = pp.Combine( pp.Word(ascii_uppercase, exact=1) + pp.Word(pp.alphanums + '._')) pathspec = (dotted_path | compositor_ops).setResultsName("pathspec") spec_group = pp.Group(pathspec + (pp.Optional(norm_options) & pp.Optional(plot_options) & pp.Optional(style_options))) opts_spec = pp.OneOrMore(spec_group) # Aliases that map to the current option name for backward compatibility aliases = { 'horizontal_spacing': 'hspace', 'vertical_spacing': 'vspace', 'figure_alpha': ' fig_alpha', 'figure_bounds': 'fig_bounds', 'figure_inches': 'fig_inches', 'figure_latex': 'fig_latex', 'figure_rcparams': 'fig_rcparams', 'figure_size': 'fig_size', 'show_xaxis': 'xaxis', 'show_yaxis': 'yaxis' } @classmethod def process_normalization(cls, parse_group): """ Given a normalization parse group (i.e. the contents of the braces), validate the option list and compute the appropriate integer value for the normalization plotting option. """ if ('norm_options' not in parse_group): return None opts = parse_group['norm_options'][0].asList() if opts == []: return None options = ['+framewise', '-framewise', '+axiswise', '-axiswise'] for normopt in options: if opts.count(normopt) > 1: raise SyntaxError("Normalization specification must not" " contain repeated %r" % normopt) if not all(opt in options for opt in opts): raise SyntaxError("Normalization option not one of %s" % ", ".join(options)) excluded = [('+framewise', '-framewise'), ('+axiswise', '-axiswise')] for pair in excluded: if all(exclude in opts for exclude in pair): raise SyntaxError("Normalization specification cannot" " contain both %s and %s" % (pair[0], pair[1])) # If unspecified, default is -axiswise and -framewise if len(opts) == 1 and opts[0].endswith('framewise'): axiswise = False framewise = True if '+framewise' in opts else False elif len(opts) == 1 and opts[0].endswith('axiswise'): framewise = False axiswise = True if '+axiswise' in opts else False else: axiswise = True if '+axiswise' in opts else False framewise = True if '+framewise' in opts else False return dict(axiswise=axiswise, framewise=framewise) @classmethod def parse(cls, line, ns={}): """ Parse an options specification, returning a dictionary with path keys and {'plot':<options>, 'style':<options>} values. """ parses = [p for p in cls.opts_spec.scanString(line)] if len(parses) != 1: raise SyntaxError("Invalid specification syntax.") else: e = parses[0][2] processed = line[:e] if (processed.strip() != line.strip()): raise SyntaxError("Failed to parse remainder of string: %r" % line[e:]) parse = {} for group in cls.opts_spec.parseString(line): options = {} normalization = cls.process_normalization(group) if normalization is not None: options['norm'] = Options(**normalization) if 'plot_options' in group: plotopts = group['plot_options'][0] opts = cls.todict(plotopts, 'brackets', ns=ns) options['plot'] = Options( **{cls.aliases.get(k, k): v for k, v in opts.items()}) if 'style_options' in group: styleopts = group['style_options'][0] opts = cls.todict(styleopts, 'parens', ns=ns) options['style'] = Options( **{cls.aliases.get(k, k): v for k, v in opts.items()}) if group['pathspec'] in parse: # Update in case same pathspec accidentally repeated by the user. parse[group['pathspec']].update(options) else: parse[group['pathspec']] = options return parse
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser): """Parse SkyDrive old log files.""" NAME = u'skydrive_log_old' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) old log files.' _ENCODING = u'UTF-8-SIG' # Common SDOL (SkyDriveOldLog) pyparsing objects. SDOL_COLON = pyparsing.Literal(u':') SDOL_EXCLAMATION = pyparsing.Literal(u'!') # Timestamp (08-01-2013 21:22:28.999). SDOL_TIMESTAMP = ( text_parser.PyparsingConstants.DATE_REV + text_parser.PyparsingConstants.TIME_MSEC).setResultsName( u'sdol_timestamp') # SkyDrive source code pyparsing structures. SDOL_SOURCE_CODE = pyparsing.Combine( pyparsing.CharsNotIn(u':') + SDOL_COLON + text_parser.PyparsingConstants.INTEGER + SDOL_EXCLAMATION + pyparsing.Word(pyparsing.printables)).setResultsName(u'source_code') # SkyDriveOldLogLevel pyparsing structures. SDOL_LOG_LEVEL = ( pyparsing.Literal(u'(').suppress() + pyparsing.SkipTo(u')').setResultsName(u'log_level') + pyparsing.Literal(u')').suppress()) # SkyDrive line pyparsing structure. SDOL_LINE = ( SDOL_TIMESTAMP + SDOL_SOURCE_CODE + SDOL_LOG_LEVEL + SDOL_COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) # Sometimes the timestamped log line is followed by an empy line, # then by a file name plus other data and finally by another empty # line. It could happen that a logline is split in two parts. # These lines will not be discarded and an event will be generated # ad-hoc (see source), based on the last one if available. SDOL_NO_HEADER_SINGLE_LINE = ( pyparsing.Optional(pyparsing.Literal(u'->').suppress()) + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) # Define the available log line structures. LINE_STRUCTURES = [ (u'logline', SDOL_LINE), (u'no_header_single_line', SDOL_NO_HEADER_SINGLE_LINE), ] def __init__(self): """Initializes a parser object.""" super(SkyDriveOldLogParser, self).__init__() self._last_event_object = None self.offset = 0 def _ConvertToTimestamp(self, sdol_timestamp): """Converts the given parsed date and time to a timestamp. This is a sdol_timestamp object as returned by using text_parser.PyparsingConstants structures: [[month, day, year], [hours, minutes, seconds], milliseconds], for example [[8, 1, 2013], [21, 22, 28], 999]. Args: sdol_timestamp: The pyparsing ParseResults object. Returns: The timestamp which is an integer containing the number of micro seconds since January 1, 1970, 00:00:00 UTC. """ month, day, year = sdol_timestamp[0] hour, minute, second = sdol_timestamp[1] millisecond = sdol_timestamp[2] return timelib.Timestamp.FromTimeParts( year, month, day, hour, minute, second, microseconds=millisecond * 1000) def _ParseLogline(self, parser_mediator, structure): """Parse a logline and store appropriate attributes. Args: parser_mediator: A parser mediator object (instance of ParserMediator). structure: A pyparsing.ParseResults object from a line in the log file. """ try: timestamp = self._ConvertToTimestamp(structure.sdol_timestamp) except errors.TimestampError as exception: parser_mediator.ProduceParseError( u'unable to determine timestamp with error: {0:s}'.format( exception)) return event_object = SkyDriveOldLogEvent( timestamp, self.offset, structure.source_code, structure.log_level, structure.text) parser_mediator.ProduceEvent(event_object) self._last_event_object = event_object def _ParseNoHeaderSingleLine(self, parser_mediator, structure): """Parse an isolated header line and store appropriate attributes. Args: parser_mediator: A parser mediator object (instance of ParserMediator). structure: A pyparsing.ParseResults object from an header line in the log file. """ if not self._last_event_object: logging.debug(u'SkyDrive, found isolated line with no previous events') return event_object = SkyDriveOldLogEvent( self._last_event_object.timestamp, self._last_event_object.offset, None, None, structure.text) parser_mediator.ProduceEvent(event_object) # TODO think to a possible refactoring for the non-header lines. self._last_event_object = None def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator: A parser mediator object (instance of ParserMediator). key: An identification string indicating the name of the parsed structure. structure: A pyparsing.ParseResults object from a line in the log file. """ if key == u'logline': self._ParseLogline(parser_mediator, structure) elif key == u'no_header_single_line': self._ParseNoHeaderSingleLine(parser_mediator, structure) else: logging.warning( u'Unable to parse record, unknown structure: {0:s}'.format(key)) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive old log file. Args: parser_mediator: A parser mediator object (instance of ParserMediator). line: A single line from the text file. Returns: True if this is the correct parser, False otherwise. """ try: parsed_structure = self.SDOL_LINE.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive old log file') return False try: self._ConvertToTimestamp(parsed_structure.sdol_timestamp) except errors.TimestampError: logging.debug( u'Not a SkyDrive old log file, invalid timestamp {0:s}'.format( parsed_structure.sdol_timestamp)) return False return True
pyparsing.alphanums + "_$.").setName("identifier") columnName = pyparsing.delimitedList(ident, ".", combine=True) columnNameList = pyparsing.Group(pyparsing.delimitedList(columnName)) tableName = pyparsing.delimitedList(ident, ".", combine=True) tableNameList = pyparsing.Group(pyparsing.delimitedList(tableName)) whereExpression = pyparsing.Forward() and_ = pyparsing.Keyword("and", caseless=True) or_ = pyparsing.Keyword("or", caseless=True) in_ = pyparsing.Keyword("in", caseless=True) E = pyparsing.CaselessLiteral("E") binop = pyparsing.oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = pyparsing.Word("+-", exact=1) realNum = pyparsing.Combine( pyparsing.Optional(arithSign) + (pyparsing.Word(pyparsing.nums) + "." + pyparsing.Optional(pyparsing.Word(pyparsing.nums)) | ("." + pyparsing.Word(pyparsing.nums))) + pyparsing.Optional(E + pyparsing.Optional(arithSign) + pyparsing.Word(pyparsing.nums))) intNum = pyparsing.Combine( pyparsing.Optional(arithSign) + pyparsing.Word(pyparsing.nums) + pyparsing.Optional(E + pyparsing.Optional("+") + pyparsing.Word(pyparsing.nums))) columnRval = realNum | intNum | pyparsing.quotedString | columnName # need to add support for # alg expressions whereCondition = pyparsing.Group((columnName + binop + columnRval) | (columnName + in_ + "(" + pyparsing.delimitedList(columnRval) + ")")
key='original_document', value=content) extra.save() extra = HOExtra(object=harvest_object, key='original_format', value=document_format) extra.save() return True apache = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress() \ + parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url') \ + parse.SkipTo("</a>", include=True).suppress() \ + parse.Optional(parse.Literal('</td><td align="right">')).suppress() \ + parse.Optional(parse.Combine( parse.Word(parse.alphanums+'-') + parse.Word(parse.alphanums+':') ,adjacent=False, joinString=' ').setResultsName('date') ) iis = parse.SkipTo("<br>").suppress() \ + parse.OneOrMore("<br>").suppress() \ + parse.Optional(parse.Combine( parse.Word(parse.alphanums+'/') + parse.Word(parse.alphanums+':') + parse.Word(parse.alphas) , adjacent=False, joinString=' ').setResultsName('date') ) \ + parse.Word(parse.nums).suppress() \
class SkyDriveLogErrorParser(text_parser.PyparsingMultiLineTextParser): """Parse SkyDrive error log files.""" NAME = u'skydrive_log_error' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) error log files.' ENCODING = u'utf-8' # Common SDE (SkyDriveError) structures. INTEGER_CAST = text_parser.PyParseIntCast HYPHEN = text_parser.PyparsingConstants.HYPHEN TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS TIME_MSEC = text_parser.PyparsingConstants.TIME_MSEC MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(INTEGER_CAST) COMMA = pyparsing.Literal(u',').suppress() DOT = pyparsing.Literal(u'.').suppress() IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress() # Header line timestamp (2013-07-25-160323.291). SDE_HEADER_TIMESTAMP = pyparsing.Group( text_parser.PyparsingConstants.DATE.setResultsName(u'date') + HYPHEN + TWO_DIGITS.setResultsName(u'hh') + TWO_DIGITS.setResultsName(u'mm') + TWO_DIGITS.setResultsName(u'ss') + DOT + MSEC.setResultsName(u'ms')).setResultsName(u'hdr_timestamp') # Line timestamp (07-25-13,16:06:31.820). SDE_TIMESTAMP = ( TWO_DIGITS.setResultsName(u'month') + HYPHEN + TWO_DIGITS.setResultsName(u'day') + HYPHEN + TWO_DIGITS.setResultsName(u'year_short') + COMMA + TIME_MSEC.setResultsName(u'time')).setResultsName(u'timestamp') # Header start. SDE_HEADER_START = ( pyparsing.Literal(u'######').suppress() + pyparsing.Literal(u'Logging started.').setResultsName(u'log_start')) # Multiline entry end marker, matched from right to left. SDE_ENTRY_END = pyparsing.StringEnd() | SDE_HEADER_START | SDE_TIMESTAMP # SkyDriveError line pyparsing structure. SDE_LINE = (SDE_TIMESTAMP + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'module') + COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'source_code') + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + pyparsing.Optional( pyparsing.CharsNotIn(u',').setResultsName(u'text')) + COMMA + pyparsing.SkipTo(SDE_ENTRY_END).setResultsName(u'detail') + pyparsing.lineEnd()) # SkyDriveError header pyparsing structure. SDE_HEADER = ( SDE_HEADER_START + pyparsing.Literal(u'Version=').setResultsName(u'ver_str') + pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'ver_num') + pyparsing.Literal(u'StartSystemTime:').suppress() + SDE_HEADER_TIMESTAMP + pyparsing.Literal(u'StartLocalTime:').setResultsName(u'lt_str') + pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') + pyparsing.lineEnd()) # Define the available log line structures. LINE_STRUCTURES = [(u'logline', SDE_LINE), (u'header', SDE_HEADER)] def __init__(self): """Initializes a parser object.""" super(SkyDriveLogErrorParser, self).__init__() self.use_local_zone = False def _GetTimestampFromHeader(self, structure): """Gets a timestamp from the structure. The following is an example of the timestamp structure expected [[2013, 7, 25], 16, 3, 23, 291] Args: structure: The parsed structure, which should be a timestamp. Returns: timestamp: A plaso timelib timestamp event or 0. """ year, month, day = structure.date hour = structure.get(u'hh', 0) minute = structure.get(u'mm', 0) second = structure.get(u'ss', 0) microsecond = structure.get(u'ms', 0) * 1000 return timelib.Timestamp.FromTimeParts(year, month, day, hour, minute, second, microseconds=microsecond) def _GetTimestampFromLine(self, structure): """Gets a timestamp from string from the structure The following is an example of the timestamp structure expected [7, 25, 13, [16, 3, 24], 649] Args: structure: The parsed structure. Returns: timestamp: A plaso timelib timestamp event or 0. """ hour, minute, second = structure.time[0] microsecond = structure.time[1] * 1000 # TODO: Verify if timestamps are locale dependent. year = structure.get(u'year_short', 0) month = structure.get(u'month', 0) day = structure.get(u'day', 0) if year < 0 or not month or not day: return 0 year += 2000 return timelib.Timestamp.FromTimeParts(year, month, day, hour, minute, second, microseconds=microsecond) def _ParseHeader(self, structure): """Parse header lines and store appropriate attributes. [u'Logging started.', u'Version=', u'17.0.2011.0627', [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>'] Args: structure: The parsed structure. Returns: timestamp: The event or none. """ timestamp = self._GetTimestampFromHeader(structure.hdr_timestamp) if not timestamp: logging.debug(u'SkyDriveLogError invalid timestamp {0:d}'.format( structure.hdr_timestamp)) return text = u'{0:s} {1:s} {2:s}'.format(structure.log_start, structure.ver_str, structure.ver_num) detail = u'{0:s} {1:s}'.format(structure.lt_str, structure.details) return SkyDriveLogErrorEvent(timestamp, None, None, text, detail) def _ParseLine(self, structure): """Parse a logline and store appropriate attributes.""" timestamp = self._GetTimestampFromLine(structure.timestamp) if not timestamp: logging.debug(u'SkyDriveLogError invalid timestamp {0:s}'.format( structure.timestamp)) return # Replace newlines with spaces in structure.detail to preserve output. return SkyDriveLogErrorEvent(timestamp, structure.module, structure.source_code, structure.text, structure.detail.replace(u'\n', u' ')) def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator: A parser mediator object (instance of ParserMediator). key: An identification string indicating the name of the parsed structure. structure: A pyparsing.ParseResults object from a line in the log file. Returns: An event object (instance of EventObject) or None. """ if key == u'logline': return self._ParseLine(structure) elif key == u'header': return self._ParseHeader(structure) else: logging.warning( u'Unable to parse record, unknown structure: {0:s}'.format( key)) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive Error log file. Args: parser_mediator: A parser mediator object (instance of ParserMediator). line: A single line from the text file. Returns: True if this is the correct parser, False otherwise. """ try: parsed_structure = self.SDE_HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive Error log file') return False timestamp = self._GetTimestampFromHeader( parsed_structure.hdr_timestamp) if not timestamp: logging.debug( u'Not a SkyDrive Error log file, invalid timestamp {0:s}'. format(parsed_structure.timestamp)) return False return True
def build_num(strnum): if "." in strnum: return float(strnum) else: return int(strnum) onenine = pp.Word("123456789", exact=1) digit = pp.Word(pp.nums, exact=1) digits = pp.Word(pp.nums) fraction = pp.Literal(".") + digits integer = pp.Combine(onenine + digits) | digit number = pp.Combine( pp.Optional(pp.Literal("-")) + integer + pp.Optional(fraction)).setParseAction(lambda t: build_num(t[0])) number.runTests(""" 1 1.0 0.1 911 01.0 -119 """) class Null: def __init__(self): pass
class XChatLogParser(text_parser.PyparsingSingleLineTextParser): """Parse XChat log files.""" NAME = u'xchatlog' DESCRIPTION = u'Parser for XChat log files.' _ENCODING = u'UTF-8' # Common (header/footer/body) pyparsing structures. # TODO: Only English ASCII timestamp supported ATM, add support for others. _WEEKDAY = pyparsing.Group( pyparsing.Keyword(u'Sun') | pyparsing.Keyword(u'Mon') | pyparsing.Keyword(u'Tue') | pyparsing.Keyword(u'Wed') | pyparsing.Keyword(u'Thu') | pyparsing.Keyword(u'Fri') | pyparsing.Keyword(u'Sat')) # Header/footer pyparsing structures. # Sample: "**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2011". # Note that "BEGIN LOGGING" text is localized (default, English) and can be # different if XChat locale is different. _HEADER_SIGNATURE = pyparsing.Keyword(u'****') _HEADER_DATE_TIME = pyparsing.Group( _WEEKDAY.setResultsName(u'weekday') + text_parser.PyparsingConstants.THREE_LETTERS.setResultsName(u'month') + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( u'day') + text_parser.PyparsingConstants.TIME_ELEMENTS + text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName(u'year')) _LOG_ACTION = pyparsing.Group( pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables)) _HEADER = (_HEADER_SIGNATURE.suppress() + _LOG_ACTION.setResultsName(u'log_action') + _HEADER_DATE_TIME.setResultsName(u'date_time')) # Body (nickname, text and/or service messages) pyparsing structures. # Sample: "dec 31 21:11:58 <fpi> ola plas-ing guys!". _DATE_TIME = pyparsing.Group( text_parser.PyparsingConstants.THREE_LETTERS.setResultsName(u'month') + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( u'day') + text_parser.PyparsingConstants.TIME_ELEMENTS) _NICKNAME = pyparsing.QuotedString( u'<', endQuoteChar=u'>').setResultsName(u'nickname') _LOG_LINE = (_DATE_TIME.setResultsName(u'date_time') + pyparsing.Optional(_NICKNAME) + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) LINE_STRUCTURES = [ (u'logline', _LOG_LINE), (u'header', _HEADER), (u'header_signature', _HEADER_SIGNATURE), ] def __init__(self): """Initializes a parser object.""" super(XChatLogParser, self).__init__() self._last_month = 0 self._xchat_year = None self.offset = 0 def _GetTimeElementsTuple(self, structure): """Retrieves a time elements tuple from the structure. Args: structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Returns: tuple: contains: year (int): year. month (int): month, where 1 represents January. day_of_month (int): day of month, where 1 is the first day of the month. hours (int): hours. minutes (int): minutes. seconds (int): seconds. """ month, day, hours, minutes, seconds = structure.date_time month = timelib.MONTH_DICT.get(month.lower(), 0) if month != 0 and month < self._last_month: # Gap detected between years. self._xchat_year += 1 return (self._xchat_year, month, day, hours, minutes, seconds) def _ParseHeader(self, parser_mediator, structure): """Parses a log header. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ _, month, day, hours, minutes, seconds, year = structure.date_time month = timelib.MONTH_DICT.get(month.lower(), 0) time_elements_tuple = (year, month, day, hours, minutes, seconds) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format(structure.date_time)) return self._last_month = month event_data = XChatLogEventData() if structure.log_action[0] == u'BEGIN': self._xchat_year = year event_data.text = u'XChat start logging' elif structure.log_action[0] == u'END': self._xchat_year = None event_data.text = u'XChat end logging' else: logging.debug(u'Unknown log action: {0:s}.'.format(u' '.join( structure.log_action))) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, event_data) def _ParseLogLine(self, parser_mediator, structure): """Parses a log line. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ if not self._xchat_year: return time_elements_tuple = self._GetTimeElementsTuple(structure) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format(structure.date_time)) return self._last_month = time_elements_tuple[1] event_data = XChatLogEventData() event_data.nickname = structure.nickname # The text string contains multiple unnecessary whitespaces that need to # be removed, thus the split and re-join. event_data.text = u' '.join(structure.text.split()) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in (u'header', u'header_signature', u'logline'): raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if key == u'logline': self._ParseLogLine(parser_mediator, structure) elif key == u'header': self._ParseHeader(parser_mediator, structure) elif key == u'header_signature': # If this key is matched (after others keys failed) we got a different # localized header and we should stop parsing until a new good header # is found. Stop parsing is done setting xchat_year to 0. # Note that the code assumes that LINE_STRUCTURES will be used in the # exact order as defined! logging.warning(u'Unknown locale header.') self._xchat_year = 0 def VerifyStructure(self, parser_mediator, line): """Verify that this file is a XChat log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a XChat log file') return False _, month, day, hours, minutes, seconds, year = structure.date_time month = timelib.MONTH_DICT.get(month.lower(), 0) time_elements_tuple = (year, month, day, hours, minutes, seconds) try: dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: logging.debug( u'Not a XChat log file, invalid date and time: {0!s}'.format( structure.date_time)) return False return True
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser): """Parse popularity contest log files.""" NAME = u'popularity_contest' DESCRIPTION = u'Parser for popularity contest log files.' _ASCII_PRINTABLES = pyparsing.printables if sys.version_info[0] < 3: _UNICODE_PRINTABLES = u''.join( unichr(character) for character in xrange(65536) if not unichr(character).isspace()) else: _UNICODE_PRINTABLES = u''.join( chr(character) for character in range(65536) if not chr(character).isspace()) MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName(u'mru') PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName(u'package') TAG = pyparsing.QuotedString(u'<', endQuoteChar=u'>').setResultsName(u'tag') TIMESTAMP = text_parser.PyparsingConstants.INTEGER.setResultsName( u'timestamp') HEADER = ( pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') + pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP + pyparsing.Literal(u'ID:').suppress() + pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName(u'id') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName(u'details')) FOOTER = ( pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName(u'session') + pyparsing.Literal(u'TIME:').suppress() + TIMESTAMP) LOG_LINE = ( TIMESTAMP.setResultsName(u'atime') + TIMESTAMP.setResultsName(u'ctime') + (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG))) LINE_STRUCTURES = [ (u'logline', LOG_LINE), (u'header', HEADER), (u'footer', FOOTER), ] _ENCODING = u'UTF-8' def _ParseLogLine(self, parser_mediator, structure): """Parses an event object from the log line. Args: parser_mediator: A parser mediator object (instance of ParserMediator). structure: the log line structure object (instance of pyparsing.ParseResults). """ # Required fields are <mru> and <atime> and we are not interested in # log lines without <mru>. if not structure.mru: return # The <atime> field (as <ctime>) is always present but could be 0. # In case of <atime> equal to 0, we are in <NOFILES> case, safely return # without logging. if structure.atime: # TODO: not doing any check on <tag> fields, even if only informative # probably it could be better to check for the expected values. event_object = PopularityContestEvent( structure.atime, eventdata.EventTimestamp.ACCESS_TIME, structure.package, structure.mru, tag=structure.tag) parser_mediator.ProduceEvent(event_object) if structure.ctime: event_object = PopularityContestEvent( structure.ctime, eventdata.EventTimestamp.ENTRY_MODIFICATION_TIME, structure.package, structure.mru, tag=structure.tag) parser_mediator.ProduceEvent(event_object) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator: A parser mediator object (instance of ParserMediator). key: An identification string indicating the name of the parsed structure. structure: A pyparsing.ParseResults object from a line in the log file. """ # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == u'logline': self._ParseLogLine(parser_mediator, structure) elif key == u'header': if not structure.timestamp: logging.debug( u'PopularityContestParser, header with invalid timestamp.') return session = u'{0!s}'.format(structure.session) event_object = PopularityContestSessionEvent( structure.timestamp, session, u'start', details=structure.details, hostid=structure.id) parser_mediator.ProduceEvent(event_object) elif key == u'footer': if not structure.timestamp: logging.debug( u'PopularityContestParser, footer with invalid timestamp.') return session = u'{0!s}'.format(structure.session) event_object = PopularityContestSessionEvent( structure.timestamp, session, u'end') parser_mediator.ProduceEvent(event_object) else: logging.warning( u'PopularityContestParser, unknown structure: {0:s}.'.format(key)) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Popularity Contest log file. Args: parser_mediator: A parser mediator object (instance of ParserMediator). line: A single line from the text file. Returns: True if this is the correct parser, False otherwise. """ try: header_struct = self.HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a Popularity Contest log file, invalid header') return False if not timelib.Timestamp.FromPosixTime(header_struct.timestamp): logging.debug(u'Invalid Popularity Contest log file header timestamp.') return False return True
class StructDefine(object): """ StructDefine is a decorator class used for defining structures by parsing a simple intermediate language input decorating a StructFormatter class. """ All = {} rawtypes = ( "x", "c", "b", "B", "h", "H", "i", "I", "l", "L", "f", "d", "s", "n", "N", "p", "P", "q", "Q", ) alignments = { "x": 1, "c": 1, "b": 1, "B": 1, "s": 1, "h": 2, "H": 2, "i": 4, "I": 4, "l": 4, "L": 4, "f": 4, "q": 8, "Q": 8, "d": 8, "P": 8, } integer = pp.Regex(r"[0-9][0-9]*") integer.setParseAction(lambda r: int(r[0])) bitslen = pp.Group(pp.Suppress("#") + integer + pp.Suppress(".") + integer) symbol = pp.Regex(r"[A-Za-z_][A-Za-z0-9_]*") comment = pp.Suppress(";") + pp.restOfLine fieldname = pp.Suppress(":") + pp.Group( pp.Optional(pp.Literal(">") | pp.Literal("<"), default=None) + symbol) inf = pp.Regex(r"~[bBhHiI]?") length = integer | symbol | inf | bitslen typename = pp.Group(symbol + pp.Optional(pp.Suppress("*") + length, default=0)) structfmt = pp.OneOrMore( pp.Group(typename + fieldname + pp.Optional(comment, default=""))) def __init__(self, fmt, **kargs): self.fields = [] self.source = fmt self.packed = kargs.get("packed", False) if "alignments" in kargs: self.alignments = kargs["alignments"] for l in self.structfmt.parseString(fmt, True).asList(): f_type, f_name, f_comment = l f_order, f_name = f_name f_type, f_count = f_type if f_order is None and "order" in kargs: f_order = kargs["order"] if f_type in self.rawtypes: f_cls = RawField if isinstance(f_count, str) and f_count.startswith("~"): f_cls = VarField if f_count[1:] in "bBhHiI": f_cls = CntField f_align = self.alignments[f_type] else: f_cls = Field f_type = kargs.get(f_type, f_type) f_align = 0 self.fields.append( f_cls(f_type, f_count, f_name, f_order, f_align, f_comment)) def __call__(self, cls): self.All[cls.__name__] = cls cls.fields = self.fields cls.source = self.source cls.packed = self.packed cls.fkeys = defaultdict(default_formatter) return cls
def __init__(self, cacheKey, aliases, stringFunctions, packageGenerator): self.__cacheKey = cacheKey self.__aliases = aliases self.__stringFunctions = stringFunctions self.__generator = packageGenerator self.__root = None self.__graph = None # create parsing grammer locationPath = pyparsing.Forward() relativeLocationPath = pyparsing.Forward() axisName = \ pyparsing.Keyword("descendant-or-self") \ | pyparsing.Keyword("child") \ | pyparsing.Keyword("descendant") \ | pyparsing.Keyword("self") nodeTest = pyparsing.Word(pyparsing.alphanums + "_.:+-*") axisSpecifier = axisName + '@' abbreviatedStep = pyparsing.Keyword('.') sQStringLiteral = pyparsing.QuotedString("'") sQStringLiteral.setParseAction(lambda s, loc, toks: StringLiteral( s, loc, toks, False, self.__stringFunctions, self.__getGraphIter)) dQStringLiteral = pyparsing.QuotedString('"', '\\') dQStringLiteral.setParseAction(lambda s, loc, toks: StringLiteral( s, loc, toks, True, self.__stringFunctions, self.__getGraphIter)) stringLiteral = sQStringLiteral | dQStringLiteral functionCall = pyparsing.Forward() functionArg = stringLiteral | functionCall functionCall << pyparsing.Word(pyparsing.alphas, pyparsing.alphanums+'-') + \ pyparsing.Suppress('(') + \ pyparsing.Optional(functionArg + pyparsing.ZeroOrMore(pyparsing.Suppress(',') + functionArg)) + \ pyparsing.Suppress(')') functionCall.setParseAction(lambda s, loc, toks: FunctionCall( s, loc, toks, self.__stringFunctions, self.__getGraphIter)) predExpr = pyparsing.infixNotation( locationPath ^ stringLiteral ^ functionCall, [('!', 1, pyparsing.opAssoc.RIGHT, lambda s, loc, toks: NotOperator(s, loc, toks, self.__getGraphRoot)), ('<', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryStrOperator(s, loc, toks, self.__getGraphIter)), ('<=', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryStrOperator(s, loc, toks, self.__getGraphIter)), ('>', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryStrOperator(s, loc, toks, self.__getGraphIter)), ('>=', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryStrOperator(s, loc, toks, self.__getGraphIter)), ('==', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryStrOperator(s, loc, toks, self.__getGraphIter)), ('!=', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryStrOperator(s, loc, toks, self.__getGraphIter)), ('&&', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryBoolOperator(s, loc, toks)), ('||', 2, pyparsing.opAssoc.LEFT, lambda s, loc, toks: BinaryBoolOperator(s, loc, toks))]) predicate = '[' + predExpr + ']' step = abbreviatedStep | (pyparsing.Optional(axisSpecifier) + nodeTest + pyparsing.Optional(predicate)) step.setParseAction(lambda s, loc, toks: LocationStep(s, loc, toks)) abbreviatedRelativeLocationPath = step + '//' + relativeLocationPath relativeLocationPath << (abbreviatedRelativeLocationPath | (step + '/' + relativeLocationPath) | step) abbreviatedAbsoluteLocationPath = '//' + relativeLocationPath absoluteLocationPath = abbreviatedAbsoluteLocationPath | \ ('/' + relativeLocationPath) locationPath << (absoluteLocationPath | relativeLocationPath) locationPath.setParseAction(lambda s, loc, toks: LocationPath( s, loc, toks, self.__getGraphRoot)) self.__pathGrammer = locationPath
def __create(self): START = pp.StringStart().suppress() END = pp.StringEnd().suppress() #----------------------------------------------------------------------# # LANGUAGE TOKENS #----------------------------------------------------------------------# TRUE = pp.Literal('True').setParseAction(lambda s, loc, toks: toks[0]) FALSE = pp.Literal('False').setParseAction( lambda s, loc, toks: toks[0]) AND = pp.Literal('and').setParseAction(lambda s, loc, toks: toks[0]) OR = pp.Literal('or').setParseAction(lambda s, loc, toks: toks[0]) NOT = pp.Literal('not').setParseAction(lambda s, loc, toks: toks[0]) # # Expression's elements # LEFT_PAREN = pp.Literal('(') RIGHT_PAREN = pp.Literal(')') LEFT_SPAREN = pp.Literal('[') RIGHT_SPAREN = pp.Literal(']') COMMA = pp.Literal(',') SEMICOLON = pp.Literal(';') # OID's syntax elements COLUMN = pp.Literal(':') TYPE_NEW = pp.Literal('@') TYPE_OLD = pp.Literal('#') # Unescaped String prefix UNESCAPE_STR = pp.Literal('r') # # Operators # ASSIGN = pp.Literal('=') # OIDs concat operator DOT = pp.Literal('.') PLUS_PLUS = pp.Literal('++') MINUS_MINUS = pp.Literal('--') POWER = pp.Literal('**') PLUS = pp.Literal('+') MINUS = pp.Literal('-') MULTI = pp.Literal('*') DIV = pp.Literal('/') MOD = pp.Literal('%') EQ = pp.Literal('eq') EQUAL = pp.Literal('==') NEQUAL = pp.Literal('!=') REGEXPQUAL = pp.Literal('=~') GT = pp.Literal('>') LT = pp.Literal('<') GEQ = pp.Literal('>=') LEQ = pp.Literal('<=') LOGIC_NOT = pp.Literal('!') LOGIC_AND = pp.Literal('&&') LOGIC_OR = pp.Literal('||') BITAND = pp.Literal('&') BITOR = pp.Literal('|') BITXOR = pp.Literal('^') # One's complement operator BITONE = pp.Literal('~') IF = pp.Literal('if') THEN = pp.Literal('then') ELSE = pp.Literal('else') TRY = pp.Literal('try') CATCH = pp.Literal('catch') #---------------------------------------------------------------------------*/ # Language Types #---------------------------------------------------------------------------*/ # # Literals # QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString( "'", escChar='\\') STRING = pp.originalTextFor(QUOTED) RSTRING = pp.originalTextFor(UNESCAPE_STR + QUOTED) # # Variable identifiers ($a, $a1, $_a, $a_a123) # VAR_ID = pp.Word('$', pp.alphanums + '_', min=2) # # Function identifiers # FUNCTION_ID = pp.Word(pp.alphas, pp.alphanums + '_', min=1) # # Numbers # HEX = pp.originalTextFor(pp.Regex('[0][xX][0-9a-fA-F]+')) DEC = pp.originalTextFor(pp.Word('0') | pp.Regex('[1-9][0-9]*')) OCTAL = pp.originalTextFor(pp.Regex('[0][0-7]+')) FLOAT1 = pp.Regex('[0-9]+[\.][0-9]+([eE][+-]?[0-9]+)*') FLOAT2 = pp.Regex('[0-9]+[\.]([eE][+-]?[0-9]+)*') FLOAT = pp.originalTextFor(FLOAT1 | FLOAT2) # # Special identifiers { <name> (@|#) } # DATA_ID = pp.originalTextFor( pp.Combine( pp.Word('{') + pp.Word(pp.alphas, pp.alphanums + '_-.') + pp.Word('@#') + pp.Word('}'))) #----------------------------------------------------------------------# #----------------------------------------------------------------------# # # GRAMMAR SYNTAX # #----------------------------------------------------------------------# #----------------------------------------------------------------------# #----------------------------------------------------------------------# # variabile # constants (1, 1.0, 'c', "foo", ecc...) # ( ... ) #----------------------------------------------------------------------# OID_SEQUENCE = pp.Regex('[0-9]+[\.][0-9]+([\.][0-9]+)+') constant = ( TRUE.setParseAction(lambda s, loc, toks: self.f.createBool(True)) | FALSE.setParseAction(lambda s, loc, toks: self.f.createBool(False)) | HEX.setParseAction( lambda s, loc, toks: self.f.createInteger(int(toks[1], 16))) | (~(OID_SEQUENCE) + FLOAT).setParseAction( lambda s, loc, toks: self.f.createFloat(float(toks[0]))) | OCTAL.setParseAction( lambda s, loc, toks: self.f.createInteger(int(toks[1], 8))) | DEC.setParseAction( lambda s, loc, toks: self.f.createInteger(int(toks[1], 10))) | STRING.setParseAction( lambda s, loc, toks: self.f.createString(toks, True)) | RSTRING.setParseAction( lambda s, loc, toks: self.f.createString(toks[1:], True))) cond_expr = pp.Forward() #----------------------------------------------------------------------# # Primary Expr #----------------------------------------------------------------------# primary_expr = ( (LEFT_PAREN.suppress() + cond_expr + RIGHT_PAREN.suppress() ).setParseAction(lambda s, loc, toks: toks[0]) | VAR_ID.setParseAction( lambda s, loc, toks: self.f.createIdentifier(toks[0])) | DATA_ID.setParseAction( lambda s, loc, toks: self.f.createDataIdentifier(toks[1])) | constant) #----------------------------------------------------------------------# # POSTFIX EXPRESSION #----------------------------------------------------------------------# # foo() # for(a,b,...) # $id() # $id # $id(a,b,...) #----------------------------------------------------------------------# # # Named argument # named_argument_value = pp.Forward() name_argument = ( FUNCTION_ID + ASSIGN.suppress() + named_argument_value ).setParseAction( lambda s, loc, toks: self.f.createNamedArgument(toks[0], toks[1])) # # Simple argument # simple_argument_value = pp.Forward() # # 1, 2, 3, foo=10, bar=10234 # argument = name_argument | simple_argument_value argument_expr_list = (argument + pp.ZeroOrMore(COMMA.suppress() + argument)) #----------------------------------------------------------------------# # ( ), (a,b,c,...) #----------------------------------------------------------------------# def _call_expr_callback(s, loc, toks): args = toks.get('args') if args is None: args = [] else: args = list(args) return ('CALL', args) call_expr = ( LEFT_PAREN.suppress() + pp.Optional(argument_expr_list('args')) + RIGHT_PAREN.suppress()).setParseAction(_call_expr_callback) #----------------------------------------------------------------------# # [], [;], [i], [i;], [;j] [i;j] #----------------------------------------------------------------------# def _range_expr_callback(s, loc, toks): args = [] start = toks.get('start') args.append(start) if 'end' in toks: end = toks.get('end') args.append(end) return ('RANGE', args) range_value = pp.Forward() range_expr = ( LEFT_SPAREN.suppress() + pp.Optional(range_value)('start') + pp.Optional(SEMICOLON.suppress() + pp.Optional(range_value)('end')) + RIGHT_SPAREN.suppress()).setParseAction(_range_expr_callback) #----------------------------------------------------------------------# call_or_range = range_expr | call_expr def _func_callback(s, loc, toks): if len(toks) == 1: return toks[0] current_t = toks[0] for t in toks[1:]: f_type, args = t if f_type == 'CALL': current_t = self.f.createCallOp(current_t, args) elif f_type == 'RANGE': current_t = self.f.createRangeOp(current_t, args) else: raise Exception("ERROR") return current_t postfix_expr = ( (FUNCTION_ID + pp.OneOrMore(call_or_range)).setParseAction(_func_callback) | (primary_expr + pp.ZeroOrMore(call_or_range)).setParseAction(_func_callback)) #----------------------------------------------------------------------# # UNARY EXPRESSION #----------------------------------------------------------------------# # <expr> # <expr>() # <expr>[] # + <expr> # - <expr> # ~ <expr> # ! <expr> #---------------------------------------------------------------------------*/ unary_expr = pp.Forward() calc_expr = ( postfix_expr | (PLUS_PLUS.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createAddAddOp(toks[0])) | (MINUS_MINUS.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createSubSubOp(toks[0])) | (PLUS.suppress() + unary_expr).setParseAction(lambda s, loc, toks: toks[0]) | (MINUS.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createMinusOp(toks[0])) | ((LOGIC_NOT | NOT).suppress() + unary_expr ).setParseAction(lambda s, loc, toks: self.f.createNotOp(toks[0])) | (BITONE.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createBitOneOp(toks[0]))) #---------------------------------------------------------------------------*/ # OID Expressions #---------------------------------------------------------------------------*/ # These expressions rappresent SNMP OID values: # # <oid expression> [':' <community-expr>] '@' [ <host-expr> [':' <port-expr>] ] # # where <oid expression> is: # # n.n.n '.' <exp-1> '.' <exp-2> '.' <exp-n> # #---------------------------------------------------------------------------*/ # # The DOT ('.') operator is a bit tricky: expressions are converted # into strings and concatenated. # # This means that if i concatenate OID 1.2.3.4 with the float # literal 5.6 the result is 1.2.3.4.5.6 # def _oid_compositon_callback(s, loc, toks): toks = list(toks) expr = toks.pop(0) while toks: expr = self.f.createConcatOID(expr, toks.pop(0)) return expr def _oid_callback(s, loc, toks): return self.f.createOID(toks[1]) oid_compositon = ( pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback) + pp.ZeroOrMore(DOT.suppress() + ( pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback) | postfix_expr))).setParseAction(_oid_compositon_callback) def _snmp_single_expr_callback(s, loc, toks): oid = toks['oid'] community = toks['community'] if 'community' in toks else None t = toks['type'] node = toks['node'] if 'node' in toks else None port = toks['port'] if 'port' in toks else None return self.f.createSnmpValue(oid, community, t, node, port) snmp_single_expr = ( oid_compositon('oid') + pp.Optional(COLUMN.suppress() + postfix_expr)('community') + pp.originalTextFor(TYPE_OLD | TYPE_NEW)('type') + pp.Optional( postfix_expr('node') + pp.Optional(COLUMN.suppress() + postfix_expr)('port')) ).setParseAction(_snmp_single_expr_callback) #----------------------------------------------------------------------# # 1.3.6.1.2.1.1@ [ ] #----------------------------------------------------------------------# def _func_callback_x(s, loc, toks): toks = list(toks) if len(toks) == 1: return toks[0] expr = toks[0] range_args = toks[1][1] return self.f.createRangeOp(expr, range_args) snmp_value_expr = ( snmp_single_expr + pp.Optional(range_expr)).setParseAction(_func_callback_x) #----------------------------------------------------------------------# # IF <expr> THEN <expr ELSE <expr> #----------------------------------------------------------------------# def _if_callback(s, loc, toks): e1 = toks.get('e1') e2 = toks.get('e2') e3 = toks.get('e3') return self.f.createIf(e1, e2, e3) if_expr = (IF.suppress() + cond_expr("e1") + THEN.suppress() + cond_expr("e2") + ELSE.suppress() + cond_expr("e3")).setParseAction(_if_callback) #----------------------------------------------------------------------# # try <expr> catch [ <id> ] ( <expr> ) [ catch <id> ( <expr> ) ....] #----------------------------------------------------------------------# def _catch_expr_callback(s, loc, toks): ex_name = toks.get('exception') expr = toks.get('expr') return (ex_name, expr) def _try_expr_callback(s, loc, toks): body = toks['body'] catch_list = list(toks['catch_list']) return self.f.createTry(body, catch_list) # # catch [ <expr> ] ( <expr> ) # catch_expr_body = pp.Forward() catch_expr = ( pp.Optional(FUNCTION_ID)('exception') + LEFT_PAREN.suppress() + pp.Optional(cond_expr)('expr') + RIGHT_PAREN.suppress()).setParseAction(_catch_expr_callback) # # try <expr> [ catch <expr> ( <expr> ) .... ] # catch_list = CATCH.suppress() + pp.OneOrMore(catch_expr) try_expr = ( TRY.suppress() + cond_expr('body') + catch_list('catch_list')).setParseAction(_try_expr_callback) #----------------------------------------------------------------------# # UNARY EXPRESSION #----------------------------------------------------------------------# unary_expr <<= (if_expr | try_expr | snmp_value_expr | calc_expr) #----------------------------------------------------------------------# # OPERATORS #----------------------------------------------------------------------# OP_MAP = { str(POWER.match): self.f.createPowerOp, str(MULTI.match): self.f.createMultiOp, str(DIV.match): self.f.createDivOp, str(MOD.match): self.f.createModOp, str(PLUS.match): self.f.createAddOp, str(MINUS.match): self.f.createSubOp, str(LT.match): self.f.createLtOp, str(GT.match): self.f.createGtOp, str(LEQ.match): self.f.createLEqOp, str(GEQ.match): self.f.createGEqOp, str(EQUAL.match): self.f.createEqOp, str(EQ.match): self.f.createEqOp, str(NEQUAL.match): self.f.createNotEqOp, str(REGEXPQUAL.match): self.f.createRegExpEqOp, str(BITAND.match): self.f.createBitAndOp, str(BITXOR.match): self.f.createBitXOrOp, str(BITOR.match): self.f.createBitOrOp, str(AND.match): self.f.createAndOp, str(LOGIC_AND.match): self.f.createAndOp, str(OR.match): self.f.createOrOp, str(LOGIC_OR.match): self.f.createOrOp, } def _op_callback(s, loc, toks): l = list(toks) if len(l) == 1: return l expr = l.pop(0) while l: op, expr2 = l.pop(0), l.pop(0) op_callback = OP_MAP[op] expr = op_callback(expr, expr2) return expr expr = unary_expr #// a ** b expr = (expr + pp.ZeroOrMore(POWER + expr)).setParseAction(_op_callback) #// a * b #// a / c #// a % c expr = (expr + pp.ZeroOrMore((MULTI | DIV | MOD) + expr)).setParseAction(_op_callback) #// a + b #// a - b expr = ( expr + pp.ZeroOrMore((PLUS | MINUS) + expr)).setParseAction(_op_callback) #// a < b #// a > b #// a <= b #// a >= b expr = (expr + pp.ZeroOrMore((LT | GT | LEQ | GEQ) + expr)).setParseAction(_op_callback) #// a == b #// a != b #// a ~= b expr = (expr + pp.ZeroOrMore((EQUAL | EQ | NEQUAL | REGEXPQUAL) + expr)).setParseAction(_op_callback) #// a & b expr = (expr + pp.ZeroOrMore(BITAND + expr)).setParseAction(_op_callback) #// a ^ b expr = (expr + pp.ZeroOrMore(BITXOR + expr)).setParseAction(_op_callback) #// a | b expr = (expr + pp.ZeroOrMore(BITOR + expr)).setParseAction(_op_callback) #// a && b expr = (expr + pp.ZeroOrMore((LOGIC_AND | AND) + expr)).setParseAction(_op_callback) #// a || b expr = ( expr + pp.ZeroOrMore((LOGIC_OR | OR) + expr)).setParseAction(_op_callback) #----------------------------------------------------------------------# # Recursive rules #----------------------------------------------------------------------# cond_expr <<= expr simple_argument_value <<= cond_expr named_argument_value <<= cond_expr range_value <<= cond_expr #----------------------------------------------------------------------# # Initiali RULE #----------------------------------------------------------------------# lang_expr = (START + cond_expr + END) return lang_expr
def act_parser_end(token): print("parser_end: " + str(token)) comment_parser = pp.Group((pp.Literal("//") + pp.restOfLine) | pp.cStyleComment).setParseAction(act_comment) pp_key1 = pp.Keyword("hoge") pp_key2 = pp.Keyword("fuga") pp_sc = pp.Literal(";") statement = pp.Group( pp.Empty().setParseAction(act_parser_start) + (pp_key1.setParseAction(act_keyword) + pp_key2.setParseAction(act_keyword)).ignore(comment_parser) + (pp_sc.setParseAction(act_sc) + pp.Optional(comment_parser)) + pp.Empty().setParseAction(act_parser_end)) parser = statement[1, ...] test_text = """\ hoge fuga; // comment1 hoge /* comment2-1 */ fuga; /* comment2-2 */ // comment3 hoge fuga; // comment4 """ ret = parser.parseString(test_text) print(ret) """ [result] parser_start: []
list_type = tag("list") + P.Combine( kw('list') - lit('(') - identifier - lit(')')) any_type = P.Group(enum_type | array_type | list_type | scalar_type).setName("type name") # Structs pad_member = P.Group(kw('pad') - s('(') - integer - s(')')) discriminator_member = P.Group( tag('discriminator') + any_type + identifier + s('==') + s('?')) type_member = P.Group(tag('type') + any_type + identifier + s('==') + integer) data_member = P.Group(tag('data') + any_type - identifier) struct_param_name = kw("align") struct_param = P.Group(struct_param_name - s('=') - word) struct_param_list = P.Forward() struct_param_list << struct_param + P.Optional( s(',') - P.Optional(struct_param_list)) struct_member = pad_member | type_member | discriminator_member | data_member parent = (s(':') - identifier) | tag(None) struct = kw('struct') - identifier - P.Group(P.Optional(s('(') - struct_param_list - s(')'))) - parent - s('{') + \ P.Group(P.ZeroOrMore(struct_member - s(';'))) + \ s('}') - s(';') # Enums enum_param_name = kw("wire_type") | kw("bitmask") | kw("complete") enum_param = P.Group(enum_param_name - s('=') - word) enum_param_list = P.Forward() enum_param_list << enum_param + P.Optional( s(',') + P.Optional(enum_param_list)) enum_member_param_name = kw("virtual")
class CompositorSpec(Parser): """ The syntax for defining a set of compositor is as follows: [ mode op(spec) [settings] value ]+ The components are: mode : Operation mode, either 'data' or 'display'. group : Value identifier with capitalized initial letter. op : The name of the operation to apply. spec : Overlay specification of form (A * B) where A and B are dotted path specifications. settings : Optional list of keyword arguments to be used as parameters to the operation (in square brackets). """ mode = pp.Word(pp.alphas + pp.nums + '_').setResultsName("mode") op = pp.Word(pp.alphas + pp.nums + '_').setResultsName("op") overlay_spec = pp.nestedExpr(opener='(', closer=')', ignoreExpr=None).setResultsName("spec") value = pp.Word(pp.alphas + pp.nums + '_').setResultsName("value") op_settings = pp.nestedExpr(opener='[', closer=']', ignoreExpr=None).setResultsName("op_settings") compositor_spec = pp.OneOrMore( pp.Group(mode + op + overlay_spec + value + pp.Optional(op_settings))) @classmethod def parse(cls, line, ns={}): """ Parse compositor specifications, returning a list Compositors """ definitions = [] parses = [p for p in cls.compositor_spec.scanString(line)] if len(parses) != 1: raise SyntaxError("Invalid specification syntax.") else: e = parses[0][2] processed = line[:e] if (processed.strip() != line.strip()): raise SyntaxError("Failed to parse remainder of string: %r" % line[e:]) opmap = {op.__name__: op for op in Compositor.operations} for group in cls.compositor_spec.parseString(line): if ('mode' not in group) or group['mode'] not in ['data', 'display']: raise SyntaxError( "Either data or display mode must be specified.") mode = group['mode'] kwargs = {} operation = opmap[group['op']] spec = ' '.join(group['spec'].asList()[0]) if group['op'] not in opmap: raise SyntaxError( "Operation %s not available for use with compositors." % group['op']) if 'op_settings' in group: kwargs = cls.todict(group['op_settings'][0], 'brackets', ns=ns) definition = Compositor(str(spec), operation, str(group['value']), mode, **kwargs) definitions.append(definition) return definitions
def _build_tgrep_parser(set_parse_actions=True): ''' Builds a pyparsing-based parser object for tokenizing and interpreting tgrep search strings. ''' tgrep_op = (pyparsing.Optional('!') + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*')) tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\', unquoteResults=False) tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\', unquoteResults=False) tgrep_qstring_icase = pyparsing.Regex( 'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"') tgrep_node_regex_icase = pyparsing.Regex( 'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/') tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') tgrep_expr = pyparsing.Forward() tgrep_relations = pyparsing.Forward() tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')' tgrep_nltk_tree_pos = (pyparsing.Literal('N(') + pyparsing.Optional( pyparsing.Word(pyparsing.nums) + ',' + pyparsing.Optional( pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',') + pyparsing.Optional(','))) + ')') tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+') tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label) # see _tgrep_segmented_pattern_action tgrep_node_label_use_pred = tgrep_node_label_use.copy() macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+') macro_name.setWhitespaceChars('') macro_use = pyparsing.Combine('@' + macro_name) tgrep_node_expr = (tgrep_node_label_use_pred | macro_use | tgrep_nltk_tree_pos | tgrep_qstring_icase | tgrep_node_regex_icase | tgrep_qstring | tgrep_node_regex | '*' | tgrep_node_literal) tgrep_node_expr2 = ( (tgrep_node_expr + pyparsing.Literal('=').setWhitespaceChars('') + tgrep_node_label.copy().setWhitespaceChars('')) | tgrep_node_expr) tgrep_node = (tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 + pyparsing.ZeroOrMore("|" + tgrep_node_expr))) tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']' tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node) tgrep_rel_conjunction = pyparsing.Forward() tgrep_rel_conjunction << ( tgrep_relation + pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction)) tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( "|" + tgrep_relations) tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional( tgrep_relations) tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled) macro_defn = (pyparsing.Literal('@') + pyparsing.White().suppress() + macro_name + tgrep_expr2) tgrep_exprs = ( pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') + tgrep_expr2 + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) + pyparsing.ZeroOrMore(';').suppress()) if set_parse_actions: tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action) tgrep_node_label_use_pred.setParseAction( _tgrep_node_label_pred_use_action) macro_use.setParseAction(_tgrep_macro_use_action) tgrep_node.setParseAction(_tgrep_node_action) tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action) tgrep_parens.setParseAction(_tgrep_parens_action) tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action) tgrep_relation.setParseAction(_tgrep_relation_action) tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action) tgrep_relations.setParseAction(_tgrep_rel_disjunction_action) macro_defn.setParseAction(_macro_defn_action) # the whole expression is also the conjunction of two # predicates: the first node predicate, and the remaining # relation predicates tgrep_expr.setParseAction(_tgrep_conjunction_action) tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action) tgrep_expr2.setParseAction( functools.partial(_tgrep_conjunction_action, join_char=':')) tgrep_exprs.setParseAction(_tgrep_exprs_action) return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
# Initialize non-ascii unicode code points in the Basic Multilingual Plane. unicode_printables = u''.join( six.unichr(c) for c in range(128, 65536) if not six.unichr(c).isspace()) # Does not like comma. No Literals from above allowed. valid_identifier_chars = ((unicode_printables + pyparsing.alphanums + ".-_#!$%&'*+/:;?@[\\]^`|~")) metric_name = (pyparsing.Word(valid_identifier_chars, min=1, max=255)("metric_name")) dimension_name = pyparsing.Word(valid_identifier_chars + ' ', min=1, max=255) dimension_value = pyparsing.Word(valid_identifier_chars + ' ', min=1, max=255) MINUS = pyparsing.Literal('-') integer_number = pyparsing.Word(pyparsing.nums) decimal_number = (pyparsing.Optional(MINUS) + integer_number + pyparsing.Optional("." + integer_number)) decimal_number.setParseAction(lambda tokens: "".join(tokens)) max = pyparsing.CaselessLiteral("max") min = pyparsing.CaselessLiteral("min") avg = pyparsing.CaselessLiteral("avg") count = pyparsing.CaselessLiteral("count") sum = pyparsing.CaselessLiteral("sum") last = pyparsing.CaselessLiteral("last") func = (max | min | avg | count | sum | last)("func") less_than_op = ((pyparsing.CaselessLiteral("<") | pyparsing.CaselessLiteral("lt"))) less_than_eq_op = ((pyparsing.CaselessLiteral("<=") | pyparsing.CaselessLiteral("lte")))
LPAR, RPAR = map(pp.Suppress, "()") numvalue = pp.Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") term = pp.Forward() factor = pp.Forward() addsub = pp.oneOf('+ -') muldiv = pp.oneOf('* /') compare = pp.Regex(">=|<=|!=|>|<|==").setName("compare") NOT_ = pp.Keyword("NOT").setName("NOT") AND_ = pp.Keyword("AND").setName("AND") OR_ = pp.Keyword("OR").setName("OR") symbol = pp.Word(pp.alphas).setName("symbol") propsymbol = pp.Group(symbol + "." + symbol).setName("propsymbol") formula = pp.Optional(addsub) + term + pp.ZeroOrMore(addsub + term) term << (factor + pp.ZeroOrMore(muldiv + factor)) factor << (numvalue | propsymbol | LPAR + formula + RPAR) factor = numvalue | propsymbol # condition = pp.Group(factor + compare + factor) formula = pp.infixNotation(factor, [ ( muldiv, 2, pp.opAssoc.LEFT, ), ( addsub, 2,
class SyslogParser(text_parser.PyparsingMultiLineTextParser): """Parses syslog formatted log files""" NAME = 'syslog' DESCRIPTION = 'Syslog Parser' _ENCODING = 'utf-8' _plugin_classes = {} # The reporter and facility fields can contain any printable character, but # to allow for processing of syslog formats that delimit the reporter and # facility with printable characters, we remove certain common delimiters # from the set of printable characters. _REPORTER_CHARACTERS = ''.join( [c for c in pyparsing.printables if c not in [':', '[', '<']]) _FACILITY_CHARACTERS = ''.join( [c for c in pyparsing.printables if c not in [':', '>']]) _SYSLOG_SEVERITY = [ 'EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG' ] _OFFSET_PREFIX = ['-', '+'] _BODY_CONTENT = ( r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \ r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \ r'[\+|-]\d{2}:\d{2}\s))') _VERIFICATION_REGEX = re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' + _BODY_CONTENT) # The Chrome OS syslog messages are of a format beginning with an # ISO 8601 combined date and time expression with timezone designator: # 2016-10-25T12:37:23.297265-07:00 # # This will then be followed by the SYSLOG Severity which will be one of: # EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG # # 2016-10-25T12:37:23.297265-07:00 INFO _CHROMEOS_VERIFICATION_REGEX = re.compile( r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.' r'\d{6}[\+|-]\d{2}:\d{2}\s' r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_CONTENT) _PYPARSING_COMPONENTS = { 'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'), 'two_digit_month': (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'two_digit_month')), 'month': text_parser.PyparsingConstants.MONTH.setResultsName('month'), 'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day'), 'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hour'), 'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'), 'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second'), 'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName('fractional_seconds'), 'hostname': pyparsing.Word(pyparsing.printables).setResultsName('hostname'), 'reporter': pyparsing.Word(_REPORTER_CHARACTERS).setResultsName('reporter'), 'pid': text_parser.PyparsingConstants.PID.setResultsName('pid'), 'facility': pyparsing.Word(_FACILITY_CHARACTERS).setResultsName('facility'), 'severity': pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'), 'body': pyparsing.Regex(_BODY_CONTENT, re.DOTALL).setResultsName('body'), 'comment_body': pyparsing.SkipTo(' ---').setResultsName('body'), 'iso_8601_offset': (pyparsing.oneOf(_OFFSET_PREFIX) + text_parser.PyparsingConstants.TWO_DIGITS + pyparsing.Optional( pyparsing.Literal(':') + text_parser.PyparsingConstants.TWO_DIGITS)) } _PYPARSING_COMPONENTS['date'] = ( _PYPARSING_COMPONENTS['month'] + _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Optional( pyparsing.Suppress('.') + _PYPARSING_COMPONENTS['fractional_seconds'])) _PYPARSING_COMPONENTS['iso_8601_date'] = pyparsing.Combine( _PYPARSING_COMPONENTS['year'] + pyparsing.Literal('-') + _PYPARSING_COMPONENTS['two_digit_month'] + pyparsing.Literal('-') + _PYPARSING_COMPONENTS['day'] + pyparsing.Literal('T') + _PYPARSING_COMPONENTS['hour'] + pyparsing.Literal(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Literal(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Literal('.') + _PYPARSING_COMPONENTS['fractional_seconds'] + _PYPARSING_COMPONENTS['iso_8601_offset'], joinString='', adjacent=True).setResultsName('iso_8601_date') _CHROMEOS_SYSLOG_LINE = ( _PYPARSING_COMPONENTS['iso_8601_date'] + _PYPARSING_COMPONENTS['severity'] + _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional(pyparsing.Suppress(':')) + pyparsing.Optional( pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] + pyparsing.Suppress(']')) + pyparsing.Optional(pyparsing.Suppress(':')) + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) _SYSLOG_LINE = ( _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['hostname'] + _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional( pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] + pyparsing.Suppress(']')) + pyparsing.Optional( pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] + pyparsing.Suppress('>')) + pyparsing.Optional(pyparsing.Suppress(':')) + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS['date'] + pyparsing.Suppress(':') + pyparsing.Suppress('---') + _PYPARSING_COMPONENTS['comment_body'] + pyparsing.Suppress('---') + pyparsing.LineEnd()) _KERNEL_SYSLOG_LINE = ( _PYPARSING_COMPONENTS['date'] + pyparsing.Literal('kernel').setResultsName('reporter') + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd()) LINE_STRUCTURES = [('syslog_line', _SYSLOG_LINE), ('syslog_line', _KERNEL_SYSLOG_LINE), ('syslog_comment', _SYSLOG_COMMENT), ('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser.""" super(SyslogParser, self).__init__() self._last_month = 0 self._maximum_year = 0 self._plugin_by_reporter = {} self._year_use = 0 def _UpdateYear(self, mediator, month): """Updates the year to use for events, based on last observed month. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. month (int): month observed by the parser, where January is 1. """ if not self._year_use: self._year_use = mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = mediator.GetLatestYear() if not self._last_month: self._last_month = month return # Some syslog daemons allow out-of-order sequences, so allow some leeway # to not cause Apr->May->Apr to cause the year to increment. # See http://bugzilla.adiscon.com/show_bug.cgi?id=527 if self._last_month > (month + 1): if self._year_use != self._maximum_year: self._year_use += 1 self._last_month = month def EnablePlugins(self, plugin_includes): """Enables parser plugins. Args: plugin_includes (list[str]): names of the plugins to enable, where None or an empty list represents all plugins. Note that the default plugin is handled separately. """ super(SyslogParser, self).EnablePlugins(plugin_includes) self._plugin_by_reporter = {} for plugin in self._plugins: self._plugin_by_reporter[plugin.REPORTER] = plugin def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'chromeos_syslog_line': timestamp = timelib.Timestamp.FromTimeString( structure.iso_8601_date[0]) else: month = timelib.MONTH_DICT.get(structure.month.lower(), None) if not month: parser_mediator.ProduceParserError( 'Invalid month value: {0:s}'.format(month)) return self._UpdateYear(parser_mediator, month) timestamp = timelib.Timestamp.FromTimeParts( year=self._year_use, month=month, day=structure.day, hour=structure.hour, minutes=structure.minute, seconds=structure.second, timezone=parser_mediator.timezone) plugin = None if key == 'syslog_comment': event_data = SyslogCommentEventData() event_data.body = structure.body # TODO: pass line number to offset or remove. event_data.offset = 0 else: event_data = SyslogLineEventData() event_data.body = structure.body event_data.hostname = structure.hostname or None # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = structure.pid event_data.reporter = structure.reporter event_data.severity = structure.severity plugin = self._plugin_by_reporter.get(structure.reporter, None) if plugin: attributes = { 'hostname': structure.hostname, 'severity': structure.severity, 'reporter': structure.reporter, 'pid': structure.pid, 'body': structure.body } try: # TODO: pass event_data instead of attributes. plugin.Process(parser_mediator, timestamp, attributes) except errors.WrongPlugin: plugin = None if not plugin: event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, unused_parser_mediator, lines): """Verifies that this is a syslog-formatted file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ return (re.match(self._VERIFICATION_REGEX, lines) or re.match( self._CHROMEOS_VERIFICATION_REGEX, lines)) is not None
_comment = pp.Regex (r'#.*').suppress () _operator = _ident.copy ().setParseAction (_make_operator).setName ('operator') _string = pp.quotedString.setParseAction (_make_string).setName ('string') _open_brace = pp.Keyword ('{').suppress () _close_brace = pp.Keyword ('}').suppress () _boolean = (pp.Keyword ('true').setParseAction (_make_true).setName ('true') ^ pp.Keyword ('false').setParseAction (_make_false).setName ('false')) _procedure = pp.Forward () operation = pp.ZeroOrMore (_comment ^ _boolean ^ _number ^ _procedure ^ _string ^ _operator) _procedure << pp.Group (_open_brace + operation + _close_brace).setName ('procedure').setParseAction (_make_procedure) _named_procedure = pp.Group (_ident.copy ().setName ('Procedure name') + pp.Optional (_comment) + _procedure).setName ('Named procedure') _grammar = pp.Dict (pp.ZeroOrMore (_comment ^ _named_procedure)) _logger = logging.getLogger (__name__) TUType = Mapping[str, instruction.Procedure] def front_end (options: Options) -> TUType: global _source_file _source_file = options.source_file global _debug_info_enabled
return pp.Keyword(kwd_str).setParseAction(pp.replaceWith(kwd_value)) TRUE = make_keyword("true", True) FALSE = make_keyword("false", False) NULL = make_keyword("null", None) LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:") jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes) jsonNumber = ppc.number() jsonObject = pp.Forward().setName("jsonObject") jsonValue = pp.Forward().setName("jsonValue") jsonElements = pp.delimitedList(jsonValue) jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK) jsonValue << (jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE | FALSE | NULL) memberDef = pp.Group(jsonString + COLON + jsonValue).setName("jsonMember") jsonMembers = pp.delimitedList(memberDef) jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE) jsonComment = pp.cppStyleComment jsonObject.ignore(jsonComment) if __name__ == "__main__": testdata = """ { "glossary": { "title": "example glossary", "GlossDiv": {
# parse utils natural = pp.Word(pp.nums) float_number = pp.Regex(r'(\-)?(\d+)?(\.)(\d*)?([eE][\-\+]\d+)?') skipLine = pp.Suppress(skip_supress('\n')) comment = pp.Suppress(pp.Literal(';')) + skipLine optional_comment = pp.ZeroOrMore(comment) word = pp.Word(pp.alphanums + "*") line = pp.Group( pp.OneOrMore(float_number | word) + pp.Optional(comment)) lines = pp.Group(pp.OneOrMore(line)) brackets = pp.Suppress("[") + word + pp.Suppress("]") # High level parsers section = brackets + optional_comment + lines many_sections = pp.Group(pp.OneOrMore(section)) # Parser for itp files itp_parser = optional_comment + many_sections # Parser for the atom section of mol2 files
'SIGMA+g': 'Σ+g', 'PI': 'Π', 'PIu': 'Πu', 'PIg': 'Πg', 'DELTA': 'Δ', 'DELTAu': 'Δu', 'DELTAg': 'Δg', 'PHI': 'Φ', 'PHIg': 'Φg', 'PHIu': 'Φu' } integer = pp.Word(pp.nums) molecule_Smult = integer.setResultsName('Smult') molecule_irrep = pp.oneOf(orbital_irrep_labels).setResultsName('irrep') molecule_Jstr = (pp.Combine(pp.Optional(pp.oneOf(('+', '-'))) + integer) + pp.Optional(pp.Suppress('/') + '2')).setResultsName('Jstr') molecule_term = (molecule_Smult + molecule_irrep + pp.Optional(pp.Suppress('_') + molecule_Jstr)) term_label = pp.Combine( pp.Word(pp.srange('[A-Za-z]')) + pp.Optional(pp.oneOf(("'", '"')))).setResultsName('term_label') molecule_term_with_label = (pp.Optional(term_label) + pp.Suppress(pp.Optional('(')) + molecule_term + pp.Suppress(pp.Optional(')')) + pp.StringEnd()) class MolecularTermSymbol(State): def parse_state(self, state_str): try:
pp.opAssoc.LEFT, ), ( pp.oneOf("||"), 2, pp.opAssoc.LEFT, ), ]) prt_list = pp.Group(pp.delimitedList(string | expr)) paren_expr = pp.Group(LPAR + expr + RPAR) stmt = pp.Forward() assignment_stmt = pp.Group(identifier + EQ + expr + SEMI) while_stmt = pp.Group(WHILE - paren_expr + stmt) if_stmt = pp.Group(IF - paren_expr + stmt + pp.Optional(ELSE + stmt)) print_stmt = pp.Group(PRINT - pp.Group(LPAR + prt_list + RPAR) + SEMI) putc_stmt = pp.Group(PUTC - paren_expr + SEMI) stmt_list = pp.Group(LBRACE + stmt[...] + RBRACE) stmt <<= (pp.Group(SEMI) | assignment_stmt | while_stmt | if_stmt | print_stmt | putc_stmt | stmt_list).setName("statement") code = stmt[...] code.ignore(pp.cppStyleComment) tests = [
def _MathParser(math_stack): """Defines the entire math expression for BigQuery queries. Converts the expression into postfix notation. The stack is reversed (i.e. the last element acts the top of the stack). Actions do not occur unless parseString is called on the BNF returned. The actions will modify the original list that was passed when the BNF was generated. The <math_stack> will return the single expression converted to postfix. Arguments: math_stack: Returns postfix notation of one math expression. Returns: A BNF of an math/string expression. """ def PushAggregation(tokens): """Pushes aggregation functions onto the stack. When the aggregation is pushed, the name is rewritten. The label is prepended with AGGREGATION_ to signal that an aggregation is occurring. Following this prefix is an integer, which represents the number of comma separated arguments that were provided. Finally, the name of the function is appended to the label. For most functions, the aggregation name is simply appended. However, there are special exceptions for COUNT. A normal count function is rewritten as AGGREGATION_i_COUNT. However, a count with the distinct keyword is rewritten to AGGREGATION_i_DISTINCTCOUNT. Args: tokens: The function name and arguments in a list object. """ function_name = tokens[0] # Rename count with distinct keyword as distinctcount. if function_name == 'COUNT': if 'DISTINCT' in list(tokens): function_name = 'DISTINCTCOUNT' # Assume all aggregation functions have at least one argument. # If a function n commas, then it has n + 1 arguments. num_args = 1 for token in tokens: if token == ',': num_args += 1 math_stack.append(util.AggregationFunctionToken(function_name, num_args)) def PushFunction(tokens): """Push a function token onto the stack. Args: tokens: list of all tokens, tokens[0] is the function name str. """ math_stack.append(util.BuiltInFunctionToken(tokens[0])) def PushSingleToken(tokens): """Push the topmost token onto the stack.""" if util.IsFloat(tokens[0]): try: token = int(tokens[0]) except ValueError: token = float(tokens[0]) elif tokens[0].startswith('\'') or tokens[0].startswith('"'): token = util.StringLiteralToken(tokens[0]) elif tokens[0].lower() in util.BIGQUERY_CONSTANTS: token = util.LiteralToken(tokens[0].lower(), util.BIGQUERY_CONSTANTS[tokens[0].lower()]) else: token = util.FieldToken(tokens[0]) math_stack.append(token) def PushCountStar(tokens): if tokens[0] != '*': raise ValueError('Not a count star argument.') math_stack.append(util.CountStarToken()) def PushUnaryOperators(tokens): # The list must be reversed since unary operations are unwrapped in the # other direction. An example is ~-1. The negation occurs before the bit # inversion. for i in reversed(range(0, len(tokens))): if tokens[i] == '-': math_stack.append(int('-1')) math_stack.append(util.OperatorToken('*', 2)) elif tokens[i] == '~': math_stack.append(util.OperatorToken('~', 1)) elif tokens[i].lower() == 'not': math_stack.append(util.OperatorToken('not', 1)) def PushBinaryOperator(tokens): math_stack.append(util.OperatorToken(tokens[0], 2)) # Miscellaneous symbols and keywords. comma = pp.Literal(',') decimal = pp.Literal('.') exponent_literal = pp.CaselessLiteral('E') lp = pp.Literal('(') rp = pp.Literal(')') count_star = pp.Literal('*') distinct_keyword = pp.CaselessKeyword('DISTINCT') # Any non-space containing sequence of characters that must begin with # an alphabetical character and contain alphanumeric characters # and underscores (i.e. function or variable names). label = pp.Word(pp.alphas, pp.alphas + pp.nums + '_' + '.') # A single/double quote surrounded string. string = pp.quotedString # Various number representations. integer = pp.Word(pp.nums) decimal_type1 = pp.Combine(integer + decimal + pp.Optional(integer)) decimal_type2 = pp.Combine(decimal + integer) real = decimal_type1 | decimal_type2 exponent = exponent_literal + pp.Word('+-' + pp.nums, pp.nums) number_without_exponent = real | integer number = pp.Combine(number_without_exponent + pp.Optional(exponent)) integer_argument = pp.Word(pp.nums) integer_argument.setParseAction(PushSingleToken) # Forward declaration for recusive grammar. We assume that full_expression can # represent any expression that is valid. full_expression = pp.Forward() # Aggregation function definitions. avg_function = pp.CaselessKeyword('AVG') + lp + full_expression + rp count_star.setParseAction(PushCountStar) count_argument = ((pp.Optional(distinct_keyword) + full_expression) | count_star) count_function = (pp.CaselessKeyword('COUNT') + lp + count_argument + pp.Optional(comma + integer_argument) + rp) quantiles_function = (pp.CaselessKeyword('QUANTILES') + lp + full_expression + pp.Optional(comma + integer_argument) + rp) stddev_function = pp.CaselessKeyword('STDDEV') + lp + full_expression + rp variance_function = pp.CaselessKeyword('VARIANCE') + lp + full_expression + rp last_function = pp.CaselessKeyword('LAST') + lp + full_expression + rp max_function = pp.CaselessKeyword('MAX') + lp + full_expression + rp min_function = pp.CaselessKeyword('MIN') + lp + full_expression + rp nth_function = (pp.CaselessKeyword('NTH') + lp + integer_argument + comma + full_expression + rp) group_concat_function = (pp.CaselessKeyword('GROUP_CONCAT') + lp + full_expression + rp) sum_function = pp.CaselessKeyword('SUM') + lp + full_expression + rp top_function = (pp.CaselessKeyword('TOP') + lp + full_expression + pp.Optional(comma + integer_argument + pp.Optional(comma + integer_argument)) + rp) aggregate_functions = (avg_function | count_function | quantiles_function | stddev_function | variance_function | last_function | max_function | min_function | nth_function | group_concat_function | sum_function | top_function) aggregate_functions.setParseAction(PushAggregation) functions_arguments = pp.Optional(full_expression + pp.ZeroOrMore(comma.suppress() + full_expression)) functions = label + lp + functions_arguments + rp functions.setParseAction(PushFunction) literals = number | string | label literals.setParseAction(PushSingleToken) # Any expression that can be modified by an unary operator. # We include strings (even though they can't be modified by any unary # operator) since atoms do not necessitate modification by unary operators. # These errors will be caught by the interpreter. atom = ((lp + full_expression + rp) | aggregate_functions | functions | literals) unary_operators = (pp.CaselessLiteral('+') | pp.CaselessLiteral('-') | pp.CaselessLiteral('~') | pp.CaselessKeyword('not')) # Take all unary operators preceding atom (possibly many). current_expression = (pp.ZeroOrMore(unary_operators) + atom.suppress()) current_expression.setParseAction(PushUnaryOperators) # All operators in same set have same precedence. Precedence is top to bottom. binary_operators = [ (pp.CaselessLiteral('*') | pp.CaselessLiteral('/') | pp.CaselessLiteral('%')), pp.CaselessLiteral('+') | pp.CaselessLiteral('-'), pp.CaselessLiteral('>>') | pp.CaselessLiteral('<<'), (pp.CaselessLiteral('<=') | pp.CaselessLiteral('>=') | pp.CaselessLiteral('<') | pp.CaselessLiteral('>')), (pp.CaselessLiteral('==') | pp.CaselessLiteral('=') | pp.CaselessLiteral('!=')), pp.CaselessKeyword('is') | pp.CaselessKeyword('contains'), pp.CaselessLiteral('&'), pp.CaselessLiteral('^'), pp.CaselessLiteral('|'), pp.CaselessKeyword('and'), pp.CaselessKeyword('or'), ] # Take the operator set of the most precedence that has not been parsed. # Find and collapse all operators of the set. Thus, order of operations # is not broken. Equivalent to recursive descent parsing. # Below code is equivalent to: # expression = expression + pp.ZeroOrMore(op_level1 + expression) # expression = expression + pp.ZeroOrMore(op_level2 + expression) # ... for operator_set in binary_operators: # Represents _i-1 ai part of expression that is added to current expression. operator_expression = operator_set + current_expression # Push only the operator, both atoms will have already been pushed. operator_expression.setParseAction(PushBinaryOperator) # pylint: disable=g-no-augmented-assignment current_expression = (current_expression + pp.ZeroOrMore(operator_expression)) # pylint: disable=pointless-statement full_expression << current_expression return full_expression
class SSHPlugin(interface.SyslogPlugin): """A plugin for creating events from syslog message produced by SSH.""" NAME = 'ssh' DESCRIPTION = 'Parser for SSH syslog entries.' REPORTER = 'sshd' _AUTHENTICATION_METHOD = (pyparsing.Keyword('password') | pyparsing.Keyword('publickey')) _PYPARSING_COMPONENTS = { 'address': text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('address'), 'authentication_method': _AUTHENTICATION_METHOD.setResultsName('authentication_method'), 'fingerprint': pyparsing.Combine( pyparsing.Literal('RSA ') + pyparsing.Word(':' + pyparsing.hexnums)).setResultsName('fingerprint'), 'port': pyparsing.Word(pyparsing.nums, max=5).setResultsName('port'), 'protocol': pyparsing.Literal('ssh2').setResultsName('protocol'), 'username': pyparsing.Word(pyparsing.alphanums).setResultsName('username'), } _LOGIN_GRAMMAR = ( pyparsing.Literal('Accepted') + _PYPARSING_COMPONENTS['authentication_method'] + pyparsing.Literal('for') + _PYPARSING_COMPONENTS['username'] + pyparsing.Literal('from') + _PYPARSING_COMPONENTS['address'] + pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] + _PYPARSING_COMPONENTS['protocol'] + pyparsing.Optional( pyparsing.Literal(':') + _PYPARSING_COMPONENTS['fingerprint']) + pyparsing.StringEnd()) _FAILED_CONNECTION_GRAMMAR = ( pyparsing.Literal('Failed') + _PYPARSING_COMPONENTS['authentication_method'] + pyparsing.Literal('for') + _PYPARSING_COMPONENTS['username'] + pyparsing.Literal('from') + _PYPARSING_COMPONENTS['address'] + pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] + pyparsing.StringEnd()) _OPENED_CONNECTION_GRAMMAR = (pyparsing.Literal('Connection from') + _PYPARSING_COMPONENTS['address'] + pyparsing.Literal('port') + _PYPARSING_COMPONENTS['port'] + pyparsing.LineEnd()) MESSAGE_GRAMMARS = [ ('login', _LOGIN_GRAMMAR), ('failed_connection', _FAILED_CONNECTION_GRAMMAR), ('opened_connection', _OPENED_CONNECTION_GRAMMAR), ] def ParseMessage(self, parser_mediator, key, timestamp, tokens): """Produces an event from a syslog body that matched one of the grammars. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the matching grammar. timestamp (int): the timestamp, which contains the number of micro seconds since January 1, 1970, 00:00:00 UTC or 0 on error. tokens (dict[str, str]): tokens derived from a syslog message based on the defined grammar. Raises: AttributeError: If an unknown key is provided. """ # TODO: change AttributeError into ValueError or equiv. if key not in ('failed_connection', 'login', 'opened_connection'): raise AttributeError('Unknown grammar key: {0:s}'.format(key)) if key == 'login': event_data = SSHLoginEventData() elif key == 'failed_connection': event_data = SSHFailedConnectionEventData() elif key == 'opened_connection': event_data = SSHOpenedConnectionEventData() event_data.address = tokens.get('address', None) event_data.authentication_method = tokens.get('authentication_method', None) event_data.body = tokens.get('body', None) event_data.fingerprint = tokens.get('fingerprint', None) event_data.hostname = tokens.get('hostname', None) # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = tokens.get('pid', None) event_data.protocol = tokens.get('protocol', None) event_data.port = tokens.get('port', None) event_data.reporter = tokens.get('reporter', None) event_data.severity = tokens.get('severity', None) event_data.username = tokens.get('username', None) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)