Exemple #1
0
    def consume(cls, ctx, tokens, breakset=None):
        """
    Consume tokens and return a tree of nodes. Top-level consumer parsers
    comments, whitespace, statements, and flow control blocks.
    """
        if breakset is None:
            breakset = ()

        tree = cls()
        blocks = tree.children

        while tokens:
            token = tokens[0]
            if token.type in WHITESPACE_TOKENS:
                node = WhitespaceNode.consume(ctx, tokens)
                blocks.append(node)
            elif token.type in COMMENT_TOKENS:
                node = CommentNode.consume(ctx, tokens)
                blocks.append(node)
            elif token.type in ONOFF_TOKENS:
                node = OnOffNode.consume(ctx, tokens)
                blocks.append(node)
            elif token.type == lexer.TokenType.BRACKET_COMMENT:
                node = CommentNode.consume(ctx, tokens)
                blocks.append(node)
            elif token.type == lexer.TokenType.WORD:
                upper = token.spelling.upper()
                if upper in breakset:
                    return tree
                if FlowType.get(upper) is not None:
                    subtree = FlowControlNode.consume(ctx, tokens)
                    blocks.append(subtree)
                else:
                    subtree = StatementNode.consume(ctx, tokens)
                    blocks.append(subtree)
            elif token.type == lexer.TokenType.ATWORD:
                subtree = AtWordStatementNode.consume(ctx, tokens)
                blocks.append(subtree)
            elif token.type == lexer.TokenType.BYTEORDER_MARK:
                tokens.pop(0)
            else:
                raise InternalError("Unexpected {} token at {}:{}".format(
                    tokens[0].type.name, tokens[0].begin.line,
                    tokens[0].begin.col))

        return tree
Exemple #2
0
    def consume(cls, ctx, tokens):
        """
    Consume a complete statement, removing tokens from the input list and
    returning a STATEMENT node.
    """
        node = cls()

        # Consume the function name
        fnname = tokens[0].spelling.lower()
        node.funnode = funnode = FunctionNameNode.parse(ctx, tokens)
        node.children.append(funnode)

        # Consume whitespace up to the parenthesis
        while tokens and tokens[0].type in WHITESPACE_TOKENS:
            node.children.append(tokens.pop(0))

        # TODO(josh): should the parens belong to the statement node or the
        # group node?
        if tokens[0].type != lexer.TokenType.LEFT_PAREN:
            raise ValueError(
                "Unexpected {} token at {}, expecting l-paren, got {}".format(
                    tokens[0].type.name, tokens[0].get_location(),
                    repr(tokens[0].content)))

        lparen = TreeNode(NodeType.LPAREN)
        lparen.children.append(tokens.pop(0))
        node.children.append(lparen)

        while tokens and tokens[0].type in WHITESPACE_TOKENS:
            node.children.append(tokens.pop(0))
            continue

        breakstack = [ParenBreaker()]

        parse_fun = ctx.parse_db.get(fnname, None)
        if parse_fun is None:
            # If the parse_db provides a "_default" then use that. Otherwise use the
            # standard parser with no kwargs or flags.
            parse_fun = ctx.parse_db.get("_default", StandardParser())
        node.argtree = subtree = parse_fun(ctx, tokens, breakstack)
        node.children.append(subtree)

        # NOTE(josh): technically we may have a statement specification with
        # an exact number of arguments. At this point we have broken out of that
        # statement but we might have some comments or whitespace to consume
        while tokens and tokens[0].type != lexer.TokenType.RIGHT_PAREN:
            if tokens[0].type in WHITESPACE_TOKENS:
                node.children.append(tokens.pop(0))
                continue

            if tokens[0].type in COMMENT_TOKENS:
                cnode = CommentNode.consume(ctx, tokens)
                node.children.append(cnode)
                continue

            raise UserError(
                "Unexpected {} token at {}, expecting r-paren, got {}".format(
                    tokens[0].type.name, tokens[0].get_location(),
                    repr(tokens[0].content)))

        if not tokens:
            raise UserError(
                "Unexpected end of token stream while parsing statement:\n {}".
                format(tree_string([node])))

        if tokens[0].type != lexer.TokenType.RIGHT_PAREN:
            raise UserError(
                "Unexpected {} token at {}, expecting r-paren, got {}".format(
                    tokens[0].type.name, tokens[0].get_location(),
                    repr(tokens[0].content)))

        rparen = TreeNode(NodeType.RPAREN)
        rparen.children.append(tokens.pop(0))
        node.children.append(rparen)
        CommentNode.consume_trailing(ctx, tokens, node)

        return node
  def parse(cls, ctx, tokens, npargs, flags, breakstack, sortable=False):
    """
    Parse a continuous sequence of `npargs` positional arguments. If npargs is
    an integer we will consume exactly that many arguments. If it is not an
    integer then it is a string meaning:

    * "?": zero or one
    * "*": zero or more
    * "+": one or more
    """

    tree = cls(sortable=sortable)
    tree.spec = PositionalSpec(npargs, flags)
    nconsumed = 0

    # Strip off any preceeding whitespace (note that in most cases this has
    # already been done but in some cases (such ask kwarg subparser) where
    # it hasn't
    while tokens and tokens[0].type in WHITESPACE_TOKENS:
      tree.children.append(tokens.pop(0))

    # If the first non-whitespace token is a cmake-format tag annotating
    # sortability, then parse it out here and record the annotation
    if tokens and get_tag(tokens[0]) in ("sortable", "sort"):
      tree.sortable = True
    elif tokens and get_tag(tokens[0]) in ("unsortable", "unsort"):
      tree.sortable = False

    while tokens:
      # Break if we have consumed   enough positional arguments
      if pargs_are_full(npargs, nconsumed):
        break

      # Break if the next token belongs to a parent parser, i.e. if it
      # matches a keyword argument of something higher in the stack, or if
      # it closes a parent group.
      if should_break(tokens[0], breakstack):
        # NOTE(josh): if npargs is an exact number of arguments, then we
        # shouldn't break on kwarg match from a parent parser. Instead, we
        # should consume the token. This is a hack to deal with
        # ```install(RUNTIME COMPONENT runtime)``. In this case the second
        # occurance of "runtime" should not match the ``RUNTIME`` keyword
        # and should not break the positional parser.
        # TODO(josh): this is kind of hacky because it will force the positional
        # parser to consume a right parenthesis and will lead to parse errors
        # in the event of a missing positional argument. Such errors will be
        # difficult to debug for the user.
        if not npargs_is_exact(npargs):
          break

        if tokens[0].type == lexer.TokenType.RIGHT_PAREN:
          break

      # If this is the start of a parenthetical group, then parse the group
      # NOTE(josh): syntatically this probably shouldn't be allowed here, but
      # cmake seems to accept it so we probably should too.
      if tokens[0].type == lexer.TokenType.LEFT_PAREN:
        subtree = ParenGroupNode.parse(ctx, tokens, breakstack)
        tree.children.append(subtree)
        continue

      # If it is a whitespace token then put it directly in the parse tree at
      # the current depth
      if tokens[0].type in WHITESPACE_TOKENS:
        tree.children.append(tokens.pop(0))
        continue

      # If it's a comment token not associated with an argument, then put it
      # directly into the parse tree at the current depth
      if tokens[0].type in (lexer.TokenType.COMMENT,
                            lexer.TokenType.BRACKET_COMMENT):
        before = len(tokens)
        child = CommentNode.consume(ctx, tokens)
        assert len(tokens) < before, \
            "consume_comment didn't consume any tokens"
        tree.children.append(child)
        continue

      # Otherwise is it is a positional argument, so add it to the tree as such
      if get_normalized_kwarg(tokens[0]) in flags:
        child = TreeNode(NodeType.FLAG)
      else:
        child = TreeNode(NodeType.ARGUMENT)

      child.children.append(tokens.pop(0))
      CommentNode.consume_trailing(ctx, tokens, child)
      tree.children.append(child)
      nconsumed += 1

    return tree
    def parse2(cls, ctx, tokens, cmdspec, kwargs, breakstack):
        """
    Standard parser for the commands in the form of::

        command_name(parg1 parg2 parg3...
                    KEYWORD1 kwarg1 kwarg2...
                    KEYWORD2 kwarg3 kwarg4...
                    FLAG1 FLAG2 FLAG3)
    The parser starts off as a positional parser. If a keyword or flag is
    encountered the positional parser is popped off the parse stack. If it was
    a keyword then the keyword parser is pushed on the parse stack. If it was
    a flag than a new flag parser is pushed onto the stack.
    """

        # NOTE(josh): we will pop things off this list, so let's make a copy
        pargspecs = list(cmdspec.pargs)
        tree = cls()
        tree.cmdspec = cmdspec

        # If it is a whitespace token then put it directly in the parse tree at
        # the current depth
        while tokens and tokens[0].type in WHITESPACE_TOKENS:
            tree.children.append(tokens.pop(0))
            continue

        # NOTE(josh): if there is only one non-exact legacy specification then we
        # reuse that specification for any additional positional arguments that we
        # pick up. This is to maintain the current/legacy behavior of simple
        # positional argument specifications
        # TODO(josh): double check the reasoning for this. I think it might be
        # mistaken and unnecessary
        default_spec = DEFAULT_PSPEC
        if (len(pargspecs) == 1 and pargspecs[0].legacy
                and not npargs_is_exact(pargspecs[0].nargs)):
            default_spec = pargspecs.pop(0)

        all_flags = list(default_spec.flags)
        for pspec in pargspecs:
            all_flags.extend(pspec.flags)

        kwarg_breakstack = breakstack + [
            KwargBreaker(list(kwargs.keys()) + all_flags)
        ]

        while tokens:
            # If it is a whitespace token then put it directly in the parse tree at
            # the current depth
            if tokens[0].type in WHITESPACE_TOKENS:
                tree.children.append(tokens.pop(0))
                continue

            # If it's a comment, then add it at the current depth
            if tokens[0].type in (lexer.TokenType.COMMENT,
                                  lexer.TokenType.BRACKET_COMMENT):
                if comment_belongs_up_tree(ctx, tokens, tree, breakstack):
                    break
                tree.children.append(CommentNode.consume(ctx, tokens))
                continue

            # If it's a sentinel comment, then add it at the current depth
            if tokens[0].type in (lexer.TokenType.FORMAT_OFF,
                                  lexer.TokenType.FORMAT_ON):
                tree.children.append(OnOffNode.consume(ctx, tokens))
                continue

            # Break if the next token belongs to a parent parser, i.e. if it
            # matches a keyword argument of something higher in the stack, or if
            # it closes a parent group.
            if should_break(tokens[0], breakstack):
                # NOTE(josh): if spec.nargs is an exact number of arguments, then we
                # shouldn't break on kwarg match from a parent parser. Instead, we
                # should consume that many tokens. This is a hack to deal with
                # ```install(RUNTIME COMPONENT runtime)``. In this case the second
                # occurance of "runtime" should not match the ``RUNTIME`` keyword
                # and should not break the positional parser.
                # TODO(josh): this is kind of hacky because it will force the positional
                # parser to consume a right parenthesis and will lead to parse errors
                # in the event of a missing positional argument. Such errors will be
                # difficult to debug for the user.
                if pargspecs:
                    pspec = pargspecs[0]
                else:
                    pspec = default_spec

                if not npargs_is_exact(pspec.nargs) or pspec.nargs == 0:
                    break

            ntokens = len(tokens)
            word = get_normalized_kwarg(tokens[0])
            if word in kwargs:
                with ctx.pusharg(tree):
                    subtree = KeywordGroupNode.parse(ctx, tokens, word,
                                                     kwargs[word],
                                                     kwarg_breakstack)
                tree.kwarg_groups.append(subtree)
            else:
                if pargspecs:
                    pspec = pargspecs.pop(0)
                else:
                    pspec = default_spec

                other_flags = []
                for otherspec in pargspecs:
                    for flag in otherspec.flags:
                        if flag in pspec.flags:
                            continue
                        other_flags.append(flag)
                positional_breakstack = breakstack + [
                    KwargBreaker(list(kwargs.keys()) + other_flags)
                ]

                with ctx.pusharg(tree):
                    subtree = PositionalGroupNode.parse2(
                        ctx, tokens, pspec, positional_breakstack)
                    tree.parg_groups.append(subtree)

            if len(tokens) >= ntokens:
                raise InternalError(
                    "parsed an empty subtree at {}:\n  {}\n pspec: {}".format(
                        tokens[0], dump_tree_tostr([tree]), pspec))
            tree.children.append(subtree)
        return tree
  def parse2(cls, ctx, tokens, pargspecs, kwargs, breakstack):
    """
    Standard parser for the commands in the form of::

        command_name(parg1 parg2 parg3...
                    KEYWORD1 kwarg1 kwarg2...
                    KEYWORD2 kwarg3 kwarg4...
                    FLAG1 FLAG2 FLAG3)
    The parser starts off as a positional parser. If a keyword or flag is
    encountered the positional parser is popped off the parse stack. If it was
    a keyword then the keyword parser is pushed on the parse stack. If it was
    a flag than a new flag parser is pushed onto the stack.
    """

    tree = cls()

    # If it is a whitespace token then put it directly in the parse tree at
    # the current depth
    while tokens and tokens[0].type in WHITESPACE_TOKENS:
      tree.children.append(tokens.pop(0))
      continue

    # NOTE(josh): if there is only one legacy specification then we reuse that
    # specification for any additional positional arguments that we pick up.
    # This is to maintain the current/legacy behavior of simple positional
    # argument specifications
    default_spec = DEFAULT_PSPEC
    if len(pargspecs) == 1 and pargspecs[0].legacy:
      default_spec = pargspecs.pop(0)

    all_flags = list(default_spec.flags)
    for pspec in pargspecs:
      all_flags.extend(pspec.flags)
    kwarg_breakstack = breakstack + [
        KwargBreaker(list(kwargs.keys()) + all_flags)]

    while tokens:
      # Break if the next token belongs to a parent parser, i.e. if it
      # matches a keyword argument of something higher in the stack, or if
      # it closes a parent group.
      if should_break(tokens[0], breakstack):
        break

      # If it is a whitespace token then put it directly in the parse tree at
      # the current depth
      if tokens[0].type in WHITESPACE_TOKENS:
        tree.children.append(tokens.pop(0))
        continue

      # If it's a comment, then add it at the current depth
      if tokens[0].type in (lexer.TokenType.COMMENT,
                            lexer.TokenType.BRACKET_COMMENT):
        if comment_belongs_up_tree(ctx, tokens, tree, breakstack):
          break
        tree.children.append(CommentNode.consume(ctx, tokens))
        continue

      # If it's a sentinel comment, then add it at the current depth
      if tokens[0].type in (lexer.TokenType.FORMAT_OFF,
                            lexer.TokenType.FORMAT_ON):
        tree.children.append(OnOffNode.consume(ctx, tokens))
        continue

      ntokens = len(tokens)
      word = get_normalized_kwarg(tokens[0])
      if word in kwargs:
        with ctx.pusharg(tree):
          subtree = KeywordGroupNode.parse(
              ctx, tokens, word, kwargs[word], kwarg_breakstack)
        tree.kwarg_groups.append(subtree)
      else:
        if pargspecs:
          pspec = pargspecs.pop(0)
        else:
          pspec = default_spec

        other_flags = []
        for otherspec in pargspecs:
          for flag in otherspec.flags:
            if flag in pspec.flags:
              continue
            other_flags.append(flag)
        positional_breakstack = breakstack + [
            KwargBreaker(list(kwargs.keys()) + other_flags)]

        with ctx.pusharg(tree):
          subtree = PositionalGroupNode.parse(
              ctx, tokens, pspec.nargs, pspec.flags, positional_breakstack)
          subtree.tags.extend(pspec.tags)
          tree.parg_groups.append(subtree)

      assert len(tokens) < ntokens, "parsed an empty subtree"
      tree.children.append(subtree)
    return tree