Exemple #1
0
class LSBInitLexer(lexer.Lexer):
    """Parse out upstart configurations from init scripts.

  Runlevels in /etc/init.d are defined in stanzas like:
  ### BEGIN INIT INFO
  # Provides:             sshd
  # Required-Start:       $remote_fs $syslog
  # Required-Stop:        $remote_fs $syslog
  # Default-Start:        2 3 4 5
  # Default-Stop:         1
  # Short-Description:    OpenBSD Secure Shell server
  ### END INIT INFO
  """

    tokens = [
        lexer.Token("INITIAL", r"### BEGIN INIT INFO", None, "UPSTART"),
        lexer.Token("UPSTART", r"### END INIT INFO", "Finish", "INITIAL"),
        lexer.Token("UPSTART", r"#\s+([-\w]+):\s+([^#\n]*)", "StoreEntry",
                    None),
        lexer.Token("UPSTART", r"\n\s*\w+", "Finish", None),
        lexer.Token(".*", ".", None, None)
    ]

    required = {"provides", "default-start"}

    def __init__(self):
        super(LSBInitLexer, self).__init__()
        self.entries = {}

    def StoreEntry(self, match, **_):
        key, val = match.groups()
        setting = key.strip().lower()
        if setting:
            self.entries[setting] = val

    def Finish(self, **_):
        self.buffer = []

    def ParseEntries(self, data):
        precondition.AssertType(data, Text)
        self.entries = {}
        self.Reset()
        self.Feed(data)
        self.Close()
        found = set(self.entries)
        if self.required.issubset(found):
            return self.entries
Exemple #2
0
class Parser(lexer.SearchParser):
  """Parses and generates an AST for a query written in the described language.

  Examples of valid syntax:
    size is 40
    (name contains "Program Files" AND hash.md5 is "123abc")
    @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow")
  """
  expression_cls = BasicExpression
  binary_expression_cls = BinaryExpression
  context_cls = ContextExpression
  identity_expression_cls = IdentityExpression

  list_args = []

  tokens = [
      # Operators and related tokens
      lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState",
                  "CONTEXTOPEN"),
      lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"),
      lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None),
      lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"),

      # Context
      lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"),

      # Double quoted string
      lexer.Token("STRING", "\"", "PopState,StringFinish", None),
      lexer.Token("STRING", r"\\x(..)", "HexEscape", None),
      lexer.Token("STRING", r"\\(.)", "StringEscape", None),
      lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None),

      # Single quoted string
      lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None),
      lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None),
      lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None),
      lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None),

      # List processing.
      lexer.Token("LIST_ARG", r"]", "PopState,ListFinish", None),
      lexer.Token("LIST_ARG", r"(\d+\.\d+)", "InsertFloatArg", "LIST_ARG"),
      lexer.Token("LIST_ARG", r"(0x[a-f\d]+)", "InsertInt16Arg", "LIST_ARG"),
      lexer.Token("LIST_ARG", r"(\d+)", "InsertIntArg", "LIST_ARG"),
      lexer.Token("LIST_ARG", "\"", "PushState,StringStart", "STRING"),
      lexer.Token("LIST_ARG", "'", "PushState,StringStart", "SQ_STRING"),
      lexer.Token("LIST_ARG", r",", None, None),

      # Basic expression
      lexer.Token("ATTRIBUTE", r"[\w._0-9]+", "StoreAttribute", "OPERATOR"),
      lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"),
      lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"),
      lexer.Token("ARG", r"(0x[a-f\d]+)", "InsertInt16Arg", "ARG"),
      lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"),
      lexer.Token("ARG", r"\[", "PushState,ListStart", "LIST_ARG"),
      lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"),
      lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"),
      # When the last parameter from arg_list has been pushed

      # State where binary operators are supported (AND, OR)
      lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator",
                  "INITIAL"),
      # - We can also skip spaces
      lexer.Token("BINARY", r"\s+", None, None),
      # - But if it's not "and" or just spaces we have to go back
      lexer.Token("BINARY", ".", "PushBack,PopState", None),

      # Skip whitespace.
      lexer.Token(".", r"\s+", None, None),
  ]

  def InsertArg(self, string="", **_):
    """Insert an arg to the current expression."""
    if self.state == "LIST_ARG":
      self.list_args.append(string)
    elif self.current_expression.AddArg(string):
      # This expression is complete
      self.stack.append(self.current_expression)
      self.current_expression = self.expression_cls()
      # We go to the BINARY state, to find if there's an AND or OR operator
      return "BINARY"

  def InsertFloatArg(self, string="", **_):
    """Inserts a Float argument."""
    try:
      float_value = float(string)
      return self.InsertArg(float_value)
    except (TypeError, ValueError):
      raise ParseError("%s is not a valid float." % string)

  def InsertIntArg(self, string="", **_):
    """Inserts an Integer argument."""
    try:
      int_value = int(string)
      return self.InsertArg(int_value)
    except (TypeError, ValueError):
      raise ParseError("%s is not a valid integer." % string)

  def InsertInt16Arg(self, string="", **_):
    """Inserts an Integer in base16 argument."""
    try:
      int_value = int(string, 16)
      return self.InsertArg(int_value)
    except (TypeError, ValueError):
      raise ParseError("%s is not a valid base16 integer." % string)

  def ListStart(self, **_):
    self.list_args = []

  def ListFinish(self, **_):
    return self.InsertArg(string=self.list_args)

  def StringFinish(self, **_):
    if self.state == "ATTRIBUTE":
      return self.StoreAttribute(string=self.string)

    elif self.state == "ARG" or "LIST_ARG":
      return self.InsertArg(string=self.string)

  def StringEscape(self, string, match, **_):
    """Escape backslashes found inside a string quote.

    Backslashes followed by anything other than [\'"rnbt] will raise an Error.

    Args:
      string: The string that matched.
      match: The match object (m.group(1) is the escaped code)

    Raises:
      ParseError: For strings other than those used to define a regexp, raise an
        error if the escaped string is not one of [\'"rnbt].
    """
    precondition.AssertType(string, Text)

    # Allow unfiltered strings for regexp operations so that escaped special
    # characters (e.g. \*) or special sequences (e.g. \w) can be used in
    # objectfilter.
    if self.current_expression.operator == "regexp":
      self.string += compatibility.UnescapeString(string)
    elif match.group(1) in "\\'\"rnbt":
      self.string += compatibility.UnescapeString(string)
    else:
      raise ParseError("Invalid escape character %s." % string)

  def HexEscape(self, string, match, **_):
    """Converts a hex escaped string."""
    hex_string = match.group(1)
    try:
      self.string += binascii.unhexlify(hex_string).decode("utf-8")
    # TODO: In Python 2 `binascii` throws `TypeError` for invalid
    # input values (for whathever reason). This behaviour is fixed in Python 3
    # where `binascii.Error` (a subclass of `ValueError`) is raised. Once we do
    # not have to support Python 2 anymore, this `TypeError` catch should be
    # removed.
    except (binascii.Error, TypeError) as error:
      raise ParseError("Invalid hex escape '{}': {}".format(hex_string, error))

  def ContextOperator(self, string="", **_):
    self.stack.append(self.context_cls(string[1:]))

  def Reduce(self):
    """Reduce the token stack into an AST."""
    # Check for sanity
    if self.state != "INITIAL" and self.state != "BINARY":
      self.Error("Premature end of expression")

    length = len(self.stack)
    while length > 1:
      # Precedence order
      self._CombineParenthesis()
      self._CombineBinaryExpressions("and")
      self._CombineBinaryExpressions("or")
      self._CombineContext()

      # No change
      if len(self.stack) == length:
        break
      length = len(self.stack)

    if length != 1:
      self.Error("Illegal query expression")

    return self.stack[0]

  def Error(self, message=None, _=None):
    raise ParseError("%s in position %s: %s <----> %s )" %
                     (message, len(self.processed_buffer),
                      self.processed_buffer, self.buffer))

  def _CombineBinaryExpressions(self, operator):
    for i in range(1, len(self.stack) - 1):
      item = self.stack[i]
      if (isinstance(item, lexer.BinaryExpression) and
          item.operator.lower() == operator.lower() and
          isinstance(self.stack[i - 1], lexer.Expression) and
          isinstance(self.stack[i + 1], lexer.Expression)):
        lhs = self.stack[i - 1]
        rhs = self.stack[i + 1]

        self.stack[i].AddOperands(lhs, rhs)  # pytype: disable=attribute-error
        self.stack[i - 1] = None
        self.stack[i + 1] = None

    self.stack = list(filter(None, self.stack))

  def _CombineContext(self):
    # Context can merge from item 0
    for i in range(len(self.stack) - 1, 0, -1):
      item = self.stack[i - 1]
      if (isinstance(item, ContextExpression) and
          isinstance(self.stack[i], lexer.Expression)):
        expression = self.stack[i]
        item.SetExpression(expression)
        self.stack[i] = None

    self.stack = list(filter(None, self.stack))
Exemple #3
0
 def _AddToken(self, state_regex, regex, actions, next_state):
     self._tokens.append(
         lexer.Token(state_regex, regex, actions, next_state))
Exemple #4
0
class PlistFilterParser(objectfilter.Parser):
  """Plist specific filter parser.

  Because we will be filtering dictionaries and the path components will be
  matched against dictionary keys, we must be more permissive with attribute
  names.

  This parser allows path components to be enclosed in double quotes to allow
  for spaces, dots or even raw hex-escaped data in them, such as:

    "My\x20first\x20path component".2nd."TH.IRD" contains "Google"

  We store the attribute name as a list of paths into the object instead of as
  a simple string that will be chunked in objectfilter.
  """

  tokens = [
      # Operators and related tokens
      lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState",
                  "CONTEXTOPEN"),
      lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"),
      lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None),
      lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"),

      # Context
      lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"),

      # Double quoted string
      lexer.Token("STRING", "\"", "PopState,StringFinish", None),
      lexer.Token("STRING", r"\\x(..)", "HexEscape", None),
      lexer.Token("STRING", r"\\(.)", "StringEscape", None),
      lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None),

      # Single quoted string
      lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None),
      lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None),
      lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None),
      lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None),

      # Basic expression
      lexer.Token("ATTRIBUTE", r"\.", "AddAttributePath", "ATTRIBUTE"),
      lexer.Token("ATTRIBUTE", r"\s+", "AddAttributePath", "OPERATOR"),
      lexer.Token("ATTRIBUTE", "\"", "PushState,StringStart", "STRING"),
      lexer.Token("ATTRIBUTE", r"[\w_0-9\-]+", "StringStart,StringInsert",
                  "ATTRIBUTE"),
      lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"),
      lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"),
      lexer.Token("ARG", r"(0x\d+)", "InsertInt16Arg", "ARG"),
      lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"),
      lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"),
      lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"),
      # When the last parameter from arg_list has been pushed

      # State where binary operators are supported (AND, OR)
      lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator",
                  "INITIAL"),
      # - We can also skip spaces
      lexer.Token("BINARY", r"\s+", None, None),
      # - But if it's not "and" or just spaces we have to go back
      lexer.Token("BINARY", ".", "PushBack,PopState", None),

      # Skip whitespace.
      lexer.Token(".", r"\s+", None, None),
  ]

  def StringFinish(self, **_):
    """StringFinish doesn't act on ATTRIBUTEs here."""
    if self.state == "ARG":
      return self.InsertArg(string=self.string)

  def AddAttributePath(self, **_):
    """Adds a path component to the current attribute."""
    attribute_path = self.current_expression.attribute
    if not attribute_path:
      attribute_path = []

    attribute_path.append(self.string)
    self.current_expression.SetAttribute(attribute_path)