class LSBInitLexer(lexer.Lexer): """Parse out upstart configurations from init scripts. Runlevels in /etc/init.d are defined in stanzas like: ### BEGIN INIT INFO # Provides: sshd # Required-Start: $remote_fs $syslog # Required-Stop: $remote_fs $syslog # Default-Start: 2 3 4 5 # Default-Stop: 1 # Short-Description: OpenBSD Secure Shell server ### END INIT INFO """ tokens = [ lexer.Token("INITIAL", r"### BEGIN INIT INFO", None, "UPSTART"), lexer.Token("UPSTART", r"### END INIT INFO", "Finish", "INITIAL"), lexer.Token("UPSTART", r"#\s+([-\w]+):\s+([^#\n]*)", "StoreEntry", None), lexer.Token("UPSTART", r"\n\s*\w+", "Finish", None), lexer.Token(".*", ".", None, None) ] required = {"provides", "default-start"} def __init__(self): super(LSBInitLexer, self).__init__() self.entries = {} def StoreEntry(self, match, **_): key, val = match.groups() setting = key.strip().lower() if setting: self.entries[setting] = val def Finish(self, **_): self.buffer = [] def ParseEntries(self, data): self.entries = {} self.Reset() self.Feed(utils.SmartStr(data)) self.Close() found = set(self.entries) if self.required.issubset(found): return self.entries
class StringInterpolator(lexer.Lexer): r"""Implements a lexer for the string interpolation language. Config files may specify nested interpolation codes: - The following form specifies an interpolation command: %(arg string|filter) Where arg string is an arbitrary string and filter is the name of a filter function which will receive the arg string. If filter is omitted, the arg string is interpreted as a section.parameter reference and expanded from within the config system. - Interpolation commands may be nested. In this case, the interpolation proceeds from innermost to outermost: e.g. %(arg1 %(arg2|filter2)|filter1) 1. First arg2 is passed through filter2. 2. The result of that is appended to arg1. 3. The combined string is then filtered using filter1. - The following characters must be escaped by preceeding them with a single \: - ()| """ tokens = [ # When in literal mode, only allow to escape } lexer.Token("Literal", r"\\[^{}]", "AppendArg", None), # Allow escaping of special characters lexer.Token(None, r"\\(.)", "Escape", None), # Literal sequence is %{....}. Literal states can not be nested further, # i.e. we include anything until the next }. It is still possible to # escape } if this character needs to be inserted literally. lexer.Token("Literal", r"\}", "EndLiteralExpression,PopState", None), lexer.Token("Literal", r"[^}\\]+", "AppendArg", None), lexer.Token(None, r"\%\{", "StartExpression,PushState", "Literal"), # Expansion sequence is %(....) lexer.Token(None, r"\%\(", "StartExpression", None), lexer.Token(None, r"\|([a-zA-Z_]+)\)", "Filter", None), lexer.Token(None, r"\)", "ExpandArg", None), # Glob up as much data as possible to increase efficiency here. lexer.Token(None, r"[^()%{}|\\]+", "AppendArg", None), lexer.Token(None, r".", "AppendArg", None), # Empty input is also ok. lexer.Token(None, "^$", None, None) ] STRING_ESCAPES = { "\\\\": "\\", "\\(": "(", "\\)": ")", "\\{": "{", "\\}": "}", "\\%": "%" } def __init__(self, data, config, default_section="", parameter=None, context=None): self.stack = [""] self.default_section = default_section self.parameter = parameter self.config = config self.context = context super(StringInterpolator, self).__init__(data) def Escape(self, string="", **_): """Support standard string escaping.""" # Translate special escapes: self.stack[-1] += self.STRING_ESCAPES.get(string, string) def Error(self, e): """Parse errors are fatal.""" raise ConfigFormatError("While parsing %s: %s" % (self.parameter, e)) def StartExpression(self, **_): """Start processing a new expression.""" # Extend the stack for the new expression. self.stack.append("") def EndLiteralExpression(self, **_): if len(self.stack) <= 1: raise lexer.ParseError( "Unbalanced literal sequence: Can not expand '%s'" % self.processed_buffer) arg = self.stack.pop(-1) self.stack[-1] += arg def Filter(self, match=None, **_): """Filter the current expression.""" arg = self.stack.pop(-1) # Filters can be specified as a comma separated list. for filter_name in match.group(1).split(","): filter_object = ConfigFilter.classes_by_name.get(filter_name) if filter_object is None: raise FilterError("Unknown filter function %r" % filter_name) logging.info("Applying filter %s for %s.", filter_name, arg) arg = filter_object().Filter(arg) self.stack[-1] += arg def ExpandArg(self, **_): """Expand the args as a section.parameter from the config.""" # This function is called when we see close ) and the stack depth has to # exactly match the number of (. if len(self.stack) <= 1: raise lexer.ParseError( "Unbalanced parenthesis: Can not expand '%s'" % self.processed_buffer) # This is the full parameter name: e.g. Logging.path parameter_name = self.stack.pop(-1) if "." not in parameter_name: parameter_name = "%s.%s" % (self.default_section, parameter_name) final_value = self.config.Get(parameter_name, context=self.context) if final_value is None: final_value = "" type_info_obj = (self.config.FindTypeInfo(parameter_name) or type_info.String()) # Encode the interpolated string according to its type. self.stack[-1] += type_info_obj.ToString(final_value) def AppendArg(self, string="", **_): self.stack[-1] += string def Parse(self): self.Close() if len(self.stack) != 1: raise lexer.ParseError("Nested expression not balanced.") return self.stack[0]
def _AddToken(self, state_regex, regex, actions, next_state): self._tokens.append(lexer.Token(state_regex, regex, actions, next_state))
class Parser(lexer.SearchParser): """Parses and generates an AST for a query written in the described language. Examples of valid syntax: size is 40 (name contains "Program Files" AND hash.md5 is "123abc") @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow") """ expression_cls = BasicExpression binary_expression_cls = BinaryExpression context_cls = ContextExpression identity_expression_cls = IdentityExpression tokens = [ # Operators and related tokens lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState", "CONTEXTOPEN"), lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"), lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None), lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"), # Context lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"), # Double quoted string lexer.Token("STRING", "\"", "PopState,StringFinish", None), lexer.Token("STRING", r"\\x(..)", "HexEscape", None), lexer.Token("STRING", r"\\(.)", "StringEscape", None), lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None), # Single quoted string lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None), lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None), lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None), lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None), # Basic expression lexer.Token("ATTRIBUTE", r"[\w._0-9]+", "StoreAttribute", "OPERATOR"), lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"), lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"), lexer.Token("ARG", r"(0x\d+)", "InsertInt16Arg", "ARG"), lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"), lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"), lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"), # When the last parameter from arg_list has been pushed # State where binary operators are supported (AND, OR) lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator", "INITIAL"), # - We can also skip spaces lexer.Token("BINARY", r"\s+", None, None), # - But if it's not "and" or just spaces we have to go back lexer.Token("BINARY", ".", "PushBack,PopState", None), # Skip whitespace. lexer.Token(".", r"\s+", None, None), ] def InsertArg(self, string="", **_): """Insert an arg to the current expression.""" logging.debug("Storing Argument %s", string) # This expression is complete if self.current_expression.AddArg(string): self.stack.append(self.current_expression) self.current_expression = self.expression_cls() # We go to the BINARY state, to find if there's an AND or OR operator return "BINARY" def InsertFloatArg(self, string="", **_): """Inserts a Float argument.""" try: float_value = float(string) return self.InsertArg(float_value) except (TypeError, ValueError): raise ParseError("%s is not a valid float." % string) def InsertIntArg(self, string="", **_): """Inserts an Integer argument.""" try: int_value = int(string) return self.InsertArg(int_value) except (TypeError, ValueError): raise ParseError("%s is not a valid integer." % string) def InsertInt16Arg(self, string="", **_): """Inserts an Integer in base16 argument.""" try: int_value = int(string, 16) return self.InsertArg(int_value) except (TypeError, ValueError): raise ParseError("%s is not a valid base16 integer." % string) def StringFinish(self, **_): if self.state == "ATTRIBUTE": return self.StoreAttribute(string=self.string) elif self.state == "ARG": return self.InsertArg(string=self.string) def StringEscape(self, string, match, **_): """Escape backslashes found inside a string quote. Backslashes followed by anything other than [\'"rnbt] will raise an Error. Args: string: The string that matched. match: The match object (m.group(1) is the escaped code) Raises: ParseError: When the escaped string is not one of [\'"rnbt] """ if match.group(1) in "\\'\"rnbt": self.string += string.decode("string_escape") else: raise ParseError("Invalid escape character %s." % string) def HexEscape(self, string, match, **_): """Converts a hex escaped string.""" logging.debug("HexEscape matched %s", string) hex_string = match.group(1) try: self.string += binascii.unhexlify(hex_string) except TypeError: raise ParseError("Invalid hex escape %s" % string) def ContextOperator(self, string="", **_): self.stack.append(self.context_cls(string[1:])) def Reduce(self): """Reduce the token stack into an AST.""" # Check for sanity if self.state != "INITIAL" and self.state != "BINARY": self.Error("Premature end of expression") length = len(self.stack) while length > 1: # Precendence order self._CombineParenthesis() self._CombineBinaryExpressions("and") self._CombineBinaryExpressions("or") self._CombineContext() # No change if len(self.stack) == length: break length = len(self.stack) if length != 1: self.Error("Illegal query expression") return self.stack[0] def Error(self, message=None, _=None): raise ParseError("%s in position %s: %s <----> %s )" % (message, len(self.processed_buffer), self.processed_buffer, self.buffer)) def _CombineBinaryExpressions(self, operator): for i in range(1, len(self.stack) - 1): item = self.stack[i] if (isinstance(item, lexer.BinaryExpression) and item.operator.lower() == operator.lower() and isinstance(self.stack[i - 1], lexer.Expression) and isinstance(self.stack[i + 1], lexer.Expression)): lhs = self.stack[i - 1] rhs = self.stack[i + 1] self.stack[i].AddOperands(lhs, rhs) self.stack[i - 1] = None self.stack[i + 1] = None self.stack = filter(None, self.stack) def _CombineContext(self): # Context can merge from item 0 for i in range(len(self.stack) - 1, 0, -1): item = self.stack[i - 1] if (isinstance(item, ContextExpression) and isinstance(self.stack[i], lexer.Expression)): expression = self.stack[i] self.stack[i - 1].SetExpression(expression) self.stack[i] = None self.stack = filter(None, self.stack)
class PlistFilterParser(objectfilter.Parser): """Plist specific filter parser. Because we will be filtering dictionaries and the path components will be matched against dictionary keys, we must be more permissive with attribute names. This parser allows path components to be enclosed in double quotes to allow for spaces, dots or even raw hex-escaped data in them, such as: "My\x20first\x20path component".2nd."TH.IRD" contains "Google" We store the attribute name as a list of paths into the object instead of as a simple string that will be chunked in objectfilter. """ tokens = [ # Operators and related tokens lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState", "CONTEXTOPEN"), lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"), lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None), lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"), # Context lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"), # Double quoted string lexer.Token("STRING", "\"", "PopState,StringFinish", None), lexer.Token("STRING", r"\\x(..)", "HexEscape", None), lexer.Token("STRING", r"\\(.)", "StringEscape", None), lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None), # Single quoted string lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None), lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None), lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None), lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None), # Basic expression lexer.Token("ATTRIBUTE", r"\.", "AddAttributePath", "ATTRIBUTE"), lexer.Token("ATTRIBUTE", r"\s+", "AddAttributePath", "OPERATOR"), lexer.Token("ATTRIBUTE", "\"", "PushState,StringStart", "STRING"), lexer.Token("ATTRIBUTE", r"[\w_0-9\-]+", "StringStart,StringInsert", "ATTRIBUTE"), lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"), lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"), lexer.Token("ARG", r"(0x\d+)", "InsertInt16Arg", "ARG"), lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"), lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"), lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"), # When the last parameter from arg_list has been pushed # State where binary operators are supported (AND, OR) lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator", "INITIAL"), # - We can also skip spaces lexer.Token("BINARY", r"\s+", None, None), # - But if it's not "and" or just spaces we have to go back lexer.Token("BINARY", ".", "PushBack,PopState", None), # Skip whitespace. lexer.Token(".", r"\s+", None, None), ] def StringFinish(self, **_): """StringFinish doesn't act on ATTRIBUTEs here.""" if self.state == "ARG": return self.InsertArg(string=self.string) def AddAttributePath(self, **_): """Adds a path component to the current attribute.""" attribute_path = self.current_expression.attribute if not attribute_path: attribute_path = [] attribute_path.append(self.string) self.current_expression.SetAttribute(attribute_path)
class YamlConfigLexer(lexer.Lexer): """A rough parser that breaks the config into distinct stanzas. NOTE: This is _NOT_ a yaml parser. It simply breaks the yaml file into chunks which may be rearranged to normalize the file (i.e. sort options). This is used to increase readability of the yaml file. """ verbose = False tokens = [ lexer.Token("OPTION", r"( *)[^\n]+\n", "OptionData", None), lexer.Token("OPTION", r" +", "OptionIndent", None), lexer.Token(None, r" *#[^\n]*\n", "Comment", None), lexer.Token(None, r"\n", "NewLine", None), lexer.Token(None, r"( *)([A-Z][^\n]+?:[ \n])", "Option", "OPTION"), lexer.Token(None, "---\n", "StartDocument", None), ] def __init__(self, data): super(YamlConfigLexer, self).__init__(data) self.root = Context() self.current_context = self.root self.current_context.parent = self.root self.current_option = None self.current_comments = [] def StartDocument(self, string=None, **_): self.current_comments.append(string) self.current_context.comments = self.current_comments self.current_comments = [] def PushOptionToContext(self): if self.current_option: self.current_context.options.append(self.current_option) self.current_option.parent = self.current_context self.current_option = None def Comment(self, string=None, **_): # A comment represents the end of the previous stanza and the start of the # new stanza. self.PushOptionToContext() self.current_comments.append(string) def NewLine(self, string=None, **_): # New lines are allowed between a comment and its following stanza. if self.current_option is None: self.current_comments.append(string) else: # Otherwise its considered part of the previous option - for options with # multiple lines. self.current_option.lines.append(string) def Option(self, match=None, string=None): """A New option is detected.""" # Current line indent. indent = match.group(1) # Push the previous option to the current context. self.PushOptionToContext() # Current indent is smaller than the current context, this line belongs to a # parent context. We find the context this line belongs to. if indent <= self.current_context.indent: # Switch the current context to match the indent. self.current_context = self.FindContextForOption(indent) # Currently we tell the difference between an option and a context name by # the inclusion of a "." in the name. This means contexts can not have a # . in them, if "." in string: # Regular option. self.current_option = Option() self.current_option.name = match.group(2) self.current_option.indent = match.group(1) self.current_option.comments = self.current_comments self.current_comments = [] logging.debug("Added Option %s to context %s", string, self.current_context.name) else: # This is a new context. context = Context() context.name = match.group(2) context.comments = self.current_comments context.indent = match.group(1) # This context is a sibling to the previous one. if indent == self.current_context.indent: context.parent = self.current_context.parent # This context is deeper than the previous one else: context.parent = self.current_context self.current_context.subcontexts.append(context) self.current_context = context self.current_comments = [] return "INITIAL" def Error(self, message): raise RuntimeError(message) def FindContextForOption(self, indent): """Returns the context which contains this option's indent.""" context = self.current_context while indent <= context.indent and context != self.root: context = context.parent return context def OptionData(self, string=None, match=None): # Current line indent is the same as the option name. indent = match.group(1) # This data is on the same line as the option name, it must belong to the # current option. if not self.current_option.lines: self.current_option.lines.append(string) # Current indent is less or equal to the option indent - it can not belong # to the present option. elif indent <= self.current_option.indent: self.PushBack(string) return "INITIAL" # Indent is bigger than this option - it represents data in this option. else: self.current_option.lines.append(string) def OptionIndent(self, string=None, **_): if not self.current_option: self.current_comments.append(string) else: # An indent was found with the same indent as last option - this # represents the end of this option and the start of the next option. if string == self.current_option.indent: self.PushBack(string) return "INITIAL" else: self.current_option.lines.append(string) def Close(self): super(YamlConfigLexer, self).Close() self.PushOptionToContext()