Example #1
0
class LSBInitLexer(lexer.Lexer):
    """Parse out upstart configurations from init scripts.

  Runlevels in /etc/init.d are defined in stanzas like:
  ### BEGIN INIT INFO
  # Provides:             sshd
  # Required-Start:       $remote_fs $syslog
  # Required-Stop:        $remote_fs $syslog
  # Default-Start:        2 3 4 5
  # Default-Stop:         1
  # Short-Description:    OpenBSD Secure Shell server
  ### END INIT INFO
  """

    tokens = [
        lexer.Token("INITIAL", r"### BEGIN INIT INFO", None, "UPSTART"),
        lexer.Token("UPSTART", r"### END INIT INFO", "Finish", "INITIAL"),
        lexer.Token("UPSTART", r"#\s+([-\w]+):\s+([^#\n]*)", "StoreEntry",
                    None),
        lexer.Token("UPSTART", r"\n\s*\w+", "Finish", None),
        lexer.Token(".*", ".", None, None)
    ]

    required = {"provides", "default-start"}

    def __init__(self):
        super(LSBInitLexer, self).__init__()
        self.entries = {}

    def StoreEntry(self, match, **_):
        key, val = match.groups()
        setting = key.strip().lower()
        if setting:
            self.entries[setting] = val

    def Finish(self, **_):
        self.buffer = []

    def ParseEntries(self, data):
        self.entries = {}
        self.Reset()
        self.Feed(utils.SmartStr(data))
        self.Close()
        found = set(self.entries)
        if self.required.issubset(found):
            return self.entries
Example #2
0
class StringInterpolator(lexer.Lexer):
    r"""Implements a lexer for the string interpolation language.

  Config files may specify nested interpolation codes:

  - The following form specifies an interpolation command:
      %(arg string|filter)

    Where arg string is an arbitrary string and filter is the name of a filter
    function which will receive the arg string. If filter is omitted, the arg
    string is interpreted as a section.parameter reference and expanded from
    within the config system.

  - Interpolation commands may be nested. In this case, the interpolation
    proceeds from innermost to outermost:

    e.g. %(arg1 %(arg2|filter2)|filter1)

      1. First arg2 is passed through filter2.
      2. The result of that is appended to arg1.
      3. The combined string is then filtered using filter1.

  - The following characters must be escaped by preceeding them with a single \:
     - ()|
  """

    tokens = [
        # When in literal mode, only allow to escape }
        lexer.Token("Literal", r"\\[^{}]", "AppendArg", None),

        # Allow escaping of special characters
        lexer.Token(None, r"\\(.)", "Escape", None),

        # Literal sequence is %{....}. Literal states can not be nested further,
        # i.e. we include anything until the next }. It is still possible to
        # escape } if this character needs to be inserted literally.
        lexer.Token("Literal", r"\}", "EndLiteralExpression,PopState", None),
        lexer.Token("Literal", r"[^}\\]+", "AppendArg", None),
        lexer.Token(None, r"\%\{", "StartExpression,PushState", "Literal"),

        # Expansion sequence is %(....)
        lexer.Token(None, r"\%\(", "StartExpression", None),
        lexer.Token(None, r"\|([a-zA-Z_]+)\)", "Filter", None),
        lexer.Token(None, r"\)", "ExpandArg", None),

        # Glob up as much data as possible to increase efficiency here.
        lexer.Token(None, r"[^()%{}|\\]+", "AppendArg", None),
        lexer.Token(None, r".", "AppendArg", None),

        # Empty input is also ok.
        lexer.Token(None, "^$", None, None)
    ]

    STRING_ESCAPES = {
        "\\\\": "\\",
        "\\(": "(",
        "\\)": ")",
        "\\{": "{",
        "\\}": "}",
        "\\%": "%"
    }

    def __init__(self,
                 data,
                 config,
                 default_section="",
                 parameter=None,
                 context=None):
        self.stack = [""]
        self.default_section = default_section
        self.parameter = parameter
        self.config = config
        self.context = context
        super(StringInterpolator, self).__init__(data)

    def Escape(self, string="", **_):
        """Support standard string escaping."""
        # Translate special escapes:
        self.stack[-1] += self.STRING_ESCAPES.get(string, string)

    def Error(self, e):
        """Parse errors are fatal."""
        raise ConfigFormatError("While parsing %s: %s" % (self.parameter, e))

    def StartExpression(self, **_):
        """Start processing a new expression."""
        # Extend the stack for the new expression.
        self.stack.append("")

    def EndLiteralExpression(self, **_):
        if len(self.stack) <= 1:
            raise lexer.ParseError(
                "Unbalanced literal sequence: Can not expand '%s'" %
                self.processed_buffer)

        arg = self.stack.pop(-1)
        self.stack[-1] += arg

    def Filter(self, match=None, **_):
        """Filter the current expression."""
        arg = self.stack.pop(-1)

        # Filters can be specified as a comma separated list.
        for filter_name in match.group(1).split(","):
            filter_object = ConfigFilter.classes_by_name.get(filter_name)
            if filter_object is None:
                raise FilterError("Unknown filter function %r" % filter_name)

            logging.info("Applying filter %s for %s.", filter_name, arg)
            arg = filter_object().Filter(arg)

        self.stack[-1] += arg

    def ExpandArg(self, **_):
        """Expand the args as a section.parameter from the config."""
        # This function is called when we see close ) and the stack depth has to
        # exactly match the number of (.
        if len(self.stack) <= 1:
            raise lexer.ParseError(
                "Unbalanced parenthesis: Can not expand '%s'" %
                self.processed_buffer)

        # This is the full parameter name: e.g. Logging.path
        parameter_name = self.stack.pop(-1)
        if "." not in parameter_name:
            parameter_name = "%s.%s" % (self.default_section, parameter_name)

        final_value = self.config.Get(parameter_name, context=self.context)
        if final_value is None:
            final_value = ""

        type_info_obj = (self.config.FindTypeInfo(parameter_name)
                         or type_info.String())

        # Encode the interpolated string according to its type.
        self.stack[-1] += type_info_obj.ToString(final_value)

    def AppendArg(self, string="", **_):
        self.stack[-1] += string

    def Parse(self):
        self.Close()
        if len(self.stack) != 1:
            raise lexer.ParseError("Nested expression not balanced.")

        return self.stack[0]
Example #3
0
 def _AddToken(self, state_regex, regex, actions, next_state):
   self._tokens.append(lexer.Token(state_regex, regex, actions, next_state))
Example #4
0
class Parser(lexer.SearchParser):
    """Parses and generates an AST for a query written in the described language.

  Examples of valid syntax:
    size is 40
    (name contains "Program Files" AND hash.md5 is "123abc")
    @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow")
  """
    expression_cls = BasicExpression
    binary_expression_cls = BinaryExpression
    context_cls = ContextExpression
    identity_expression_cls = IdentityExpression

    tokens = [
        # Operators and related tokens
        lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState",
                    "CONTEXTOPEN"),
        lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack",
                    "ATTRIBUTE"),
        lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None),
        lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"),

        # Context
        lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"),

        # Double quoted string
        lexer.Token("STRING", "\"", "PopState,StringFinish", None),
        lexer.Token("STRING", r"\\x(..)", "HexEscape", None),
        lexer.Token("STRING", r"\\(.)", "StringEscape", None),
        lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None),

        # Single quoted string
        lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None),
        lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None),
        lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None),
        lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None),

        # Basic expression
        lexer.Token("ATTRIBUTE", r"[\w._0-9]+", "StoreAttribute", "OPERATOR"),
        lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"),
        lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"),
        lexer.Token("ARG", r"(0x\d+)", "InsertInt16Arg", "ARG"),
        lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"),
        lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"),
        lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"),
        # When the last parameter from arg_list has been pushed

        # State where binary operators are supported (AND, OR)
        lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator",
                    "INITIAL"),
        # - We can also skip spaces
        lexer.Token("BINARY", r"\s+", None, None),
        # - But if it's not "and" or just spaces we have to go back
        lexer.Token("BINARY", ".", "PushBack,PopState", None),

        # Skip whitespace.
        lexer.Token(".", r"\s+", None, None),
    ]

    def InsertArg(self, string="", **_):
        """Insert an arg to the current expression."""
        logging.debug("Storing Argument %s", string)

        # This expression is complete
        if self.current_expression.AddArg(string):
            self.stack.append(self.current_expression)
            self.current_expression = self.expression_cls()
            # We go to the BINARY state, to find if there's an AND or OR operator
            return "BINARY"

    def InsertFloatArg(self, string="", **_):
        """Inserts a Float argument."""
        try:
            float_value = float(string)
            return self.InsertArg(float_value)
        except (TypeError, ValueError):
            raise ParseError("%s is not a valid float." % string)

    def InsertIntArg(self, string="", **_):
        """Inserts an Integer argument."""
        try:
            int_value = int(string)
            return self.InsertArg(int_value)
        except (TypeError, ValueError):
            raise ParseError("%s is not a valid integer." % string)

    def InsertInt16Arg(self, string="", **_):
        """Inserts an Integer in base16 argument."""
        try:
            int_value = int(string, 16)
            return self.InsertArg(int_value)
        except (TypeError, ValueError):
            raise ParseError("%s is not a valid base16 integer." % string)

    def StringFinish(self, **_):
        if self.state == "ATTRIBUTE":
            return self.StoreAttribute(string=self.string)

        elif self.state == "ARG":
            return self.InsertArg(string=self.string)

    def StringEscape(self, string, match, **_):
        """Escape backslashes found inside a string quote.

    Backslashes followed by anything other than [\'"rnbt] will raise an Error.

    Args:
      string: The string that matched.
      match: The match object (m.group(1) is the escaped code)

    Raises:
      ParseError: When the escaped string is not one of [\'"rnbt]
    """
        if match.group(1) in "\\'\"rnbt":
            self.string += string.decode("string_escape")
        else:
            raise ParseError("Invalid escape character %s." % string)

    def HexEscape(self, string, match, **_):
        """Converts a hex escaped string."""
        logging.debug("HexEscape matched %s", string)
        hex_string = match.group(1)
        try:
            self.string += binascii.unhexlify(hex_string)
        except TypeError:
            raise ParseError("Invalid hex escape %s" % string)

    def ContextOperator(self, string="", **_):
        self.stack.append(self.context_cls(string[1:]))

    def Reduce(self):
        """Reduce the token stack into an AST."""
        # Check for sanity
        if self.state != "INITIAL" and self.state != "BINARY":
            self.Error("Premature end of expression")

        length = len(self.stack)
        while length > 1:
            # Precendence order
            self._CombineParenthesis()
            self._CombineBinaryExpressions("and")
            self._CombineBinaryExpressions("or")
            self._CombineContext()

            # No change
            if len(self.stack) == length: break
            length = len(self.stack)

        if length != 1:
            self.Error("Illegal query expression")

        return self.stack[0]

    def Error(self, message=None, _=None):
        raise ParseError("%s in position %s: %s <----> %s )" %
                         (message, len(self.processed_buffer),
                          self.processed_buffer, self.buffer))

    def _CombineBinaryExpressions(self, operator):
        for i in range(1, len(self.stack) - 1):
            item = self.stack[i]
            if (isinstance(item, lexer.BinaryExpression)
                    and item.operator.lower() == operator.lower()
                    and isinstance(self.stack[i - 1], lexer.Expression)
                    and isinstance(self.stack[i + 1], lexer.Expression)):
                lhs = self.stack[i - 1]
                rhs = self.stack[i + 1]

                self.stack[i].AddOperands(lhs, rhs)
                self.stack[i - 1] = None
                self.stack[i + 1] = None

        self.stack = filter(None, self.stack)

    def _CombineContext(self):
        # Context can merge from item 0
        for i in range(len(self.stack) - 1, 0, -1):
            item = self.stack[i - 1]
            if (isinstance(item, ContextExpression)
                    and isinstance(self.stack[i], lexer.Expression)):
                expression = self.stack[i]
                self.stack[i - 1].SetExpression(expression)
                self.stack[i] = None

        self.stack = filter(None, self.stack)
Example #5
0
class PlistFilterParser(objectfilter.Parser):
  """Plist specific filter parser.

  Because we will be filtering dictionaries and the path components will be
  matched against dictionary keys, we must be more permissive with attribute
  names.

  This parser allows path components to be enclosed in double quotes to allow
  for spaces, dots or even raw hex-escaped data in them, such as:

    "My\x20first\x20path component".2nd."TH.IRD" contains "Google"

  We store the attribute name as a list of paths into the object instead of as
  a simple string that will be chunked in objectfilter.
  """

  tokens = [
      # Operators and related tokens
      lexer.Token("INITIAL", r"\@[\w._0-9]+",
                  "ContextOperator,PushState", "CONTEXTOPEN"),
      lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"),
      lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None),
      lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"),

      # Context
      lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"),

      # Double quoted string
      lexer.Token("STRING", "\"", "PopState,StringFinish", None),
      lexer.Token("STRING", r"\\x(..)", "HexEscape", None),
      lexer.Token("STRING", r"\\(.)", "StringEscape", None),
      lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None),

      # Single quoted string
      lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None),
      lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None),
      lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None),
      lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None),

      # Basic expression
      lexer.Token("ATTRIBUTE", r"\.", "AddAttributePath", "ATTRIBUTE"),
      lexer.Token("ATTRIBUTE", r"\s+", "AddAttributePath", "OPERATOR"),
      lexer.Token("ATTRIBUTE", "\"", "PushState,StringStart", "STRING"),
      lexer.Token("ATTRIBUTE",
                  r"[\w_0-9\-]+",
                  "StringStart,StringInsert",
                  "ATTRIBUTE"),

      lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"),

      lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"),
      lexer.Token("ARG", r"(0x\d+)", "InsertInt16Arg", "ARG"),
      lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"),
      lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"),
      lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"),
      # When the last parameter from arg_list has been pushed

      # State where binary operators are supported (AND, OR)
      lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)",
                  "BinaryOperator", "INITIAL"),
      # - We can also skip spaces
      lexer.Token("BINARY", r"\s+", None, None),
      # - But if it's not "and" or just spaces we have to go back
      lexer.Token("BINARY", ".", "PushBack,PopState", None),

      # Skip whitespace.
      lexer.Token(".", r"\s+", None, None),
  ]

  def StringFinish(self, **_):
    """StringFinish doesn't act on ATTRIBUTEs here."""
    if self.state == "ARG":
      return self.InsertArg(string=self.string)

  def AddAttributePath(self, **_):
    """Adds a path component to the current attribute."""
    attribute_path = self.current_expression.attribute
    if not attribute_path:
      attribute_path = []

    attribute_path.append(self.string)
    self.current_expression.SetAttribute(attribute_path)
Example #6
0
class YamlConfigLexer(lexer.Lexer):
    """A rough parser that breaks the config into distinct stanzas.

  NOTE: This is _NOT_ a yaml parser. It simply breaks the yaml file into chunks
  which may be rearranged to normalize the file (i.e. sort options). This is
  used to increase readability of the yaml file.
  """
    verbose = False

    tokens = [
        lexer.Token("OPTION", r"( *)[^\n]+\n", "OptionData", None),
        lexer.Token("OPTION", r" +", "OptionIndent", None),
        lexer.Token(None, r" *#[^\n]*\n", "Comment", None),
        lexer.Token(None, r"\n", "NewLine", None),
        lexer.Token(None, r"( *)([A-Z][^\n]+?:[ \n])", "Option", "OPTION"),
        lexer.Token(None, "---\n", "StartDocument", None),
    ]

    def __init__(self, data):
        super(YamlConfigLexer, self).__init__(data)
        self.root = Context()

        self.current_context = self.root
        self.current_context.parent = self.root
        self.current_option = None
        self.current_comments = []

    def StartDocument(self, string=None, **_):
        self.current_comments.append(string)
        self.current_context.comments = self.current_comments
        self.current_comments = []

    def PushOptionToContext(self):
        if self.current_option:
            self.current_context.options.append(self.current_option)
            self.current_option.parent = self.current_context

            self.current_option = None

    def Comment(self, string=None, **_):
        # A comment represents the end of the previous stanza and the start of the
        # new stanza.
        self.PushOptionToContext()
        self.current_comments.append(string)

    def NewLine(self, string=None, **_):
        # New lines are allowed between a comment and its following stanza.
        if self.current_option is None:
            self.current_comments.append(string)

        else:
            # Otherwise its considered part of the previous option - for options with
            # multiple lines.
            self.current_option.lines.append(string)

    def Option(self, match=None, string=None):
        """A New option is detected."""
        # Current line indent.
        indent = match.group(1)

        # Push the previous option to the current context.
        self.PushOptionToContext()

        # Current indent is smaller than the current context, this line belongs to a
        # parent context. We find the context this line belongs to.
        if indent <= self.current_context.indent:
            # Switch the current context to match the indent.
            self.current_context = self.FindContextForOption(indent)

        # Currently we tell the difference between an option and a context name by
        # the inclusion of a "." in the name. This means contexts can not have a
        # . in them,
        if "." in string:  # Regular option.
            self.current_option = Option()
            self.current_option.name = match.group(2)
            self.current_option.indent = match.group(1)
            self.current_option.comments = self.current_comments
            self.current_comments = []
            logging.debug("Added Option %s to context %s", string,
                          self.current_context.name)

        else:  # This is a new context.
            context = Context()
            context.name = match.group(2)
            context.comments = self.current_comments
            context.indent = match.group(1)

            # This context is a sibling to the previous one.
            if indent == self.current_context.indent:
                context.parent = self.current_context.parent

                # This context is deeper than the previous one
            else:
                context.parent = self.current_context

            self.current_context.subcontexts.append(context)
            self.current_context = context
            self.current_comments = []

            return "INITIAL"

    def Error(self, message):
        raise RuntimeError(message)

    def FindContextForOption(self, indent):
        """Returns the context which contains this option's indent."""
        context = self.current_context
        while indent <= context.indent and context != self.root:
            context = context.parent

        return context

    def OptionData(self, string=None, match=None):
        # Current line indent is the same as the option name.
        indent = match.group(1)

        # This data is on the same line as the option name, it must belong to the
        # current option.
        if not self.current_option.lines:
            self.current_option.lines.append(string)

        # Current indent is less or equal to the option indent - it can not belong
        # to the present option.
        elif indent <= self.current_option.indent:
            self.PushBack(string)
            return "INITIAL"

        # Indent is bigger than this option - it represents data in this option.
        else:
            self.current_option.lines.append(string)

    def OptionIndent(self, string=None, **_):
        if not self.current_option:
            self.current_comments.append(string)

        else:
            # An indent was found with the same indent as last option - this
            # represents the end of this option and the start of the next option.
            if string == self.current_option.indent:
                self.PushBack(string)
                return "INITIAL"

            else:
                self.current_option.lines.append(string)

    def Close(self):
        super(YamlConfigLexer, self).Close()
        self.PushOptionToContext()