Beispiel #1
0
  def ReadWord(self, lex_mode):
    # type: (lex_mode_t) -> word_t
    """Read the next Word.

    Returns:
      Word, or None if there was an error
    """
    # For integration with pgen2
    if self.buffered_word:
      w = self.buffered_word
      self.buffered_word = None
    else:
      # Implementation note: This is an stateful/iterative function that calls
      # the stateless "_ReadWord" function.
      while True:
        if lex_mode == lex_mode_e.Arith:
          # TODO: Can this be unified?
          w, need_more = self._ReadArithWord()
        elif lex_mode in (
            lex_mode_e.ShCommand, lex_mode_e.DBracket, lex_mode_e.BashRegex):
          w, need_more = self._ReadWord(lex_mode)
        else:
          raise AssertionError('Invalid lex state %s' % lex_mode)
        if not need_more:
          break

    self.cursor = w

    # TODO: Do consolidation of newlines in the lexer?
    # Note that there can be an infinite (Id.Ignored_Comment Id.Op_Newline
    # Id.Ignored_Comment Id.Op_Newline) sequence, so we have to keep track of
    # the last non-ignored token.
    self.cursor_was_newline = (word_.CommandId(self.cursor) == Id.Op_Newline)
    return self.cursor
Beispiel #2
0
  def testReadArith(self):
    CASES = [
        '1 + 2',
        'a + b',
        '$a * $b',
        '${a} * ${b}',
        '$(echo 1) * $(echo 2)',
        '`echo 1` + 2',
        '$((1 + 2)) * $((3 + 4))',
        "'single quoted'",  # Allowed by oil but not bash
        '"${a}" + "${b}"',  # Ditto
        '$# + $$',
        # This doesn't work but does in bash -- should be 15
        #'$(( $(echo 1)$(echo 2) + 3 ))',

        '$(( x[0] < 5 ))',
        '$(( ++i ))',
        '$(( i++ ))',

        '$(( x -= 1))',
        '$(( x |= 1))',

        '$(( x[0] = 1 ))',

        '$(( 1 | 0 ))',

        '$((0x$size))',
    ]

    for expr in CASES:
      print('---')
      print(expr)
      print()

      w_parser = test_lib.InitWordParser(expr)
      w_parser._Next(lex_mode_e.Arith)  # Can we remove this requirement?

      while True:
        w = w_parser.ReadWord(lex_mode_e.Arith)
        assert w is not None
        w.PrettyPrint()
        if word_.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok):
          break
Beispiel #3
0
  def testRead(self):
    CASES = [
        'ls "foo"',
        '$(( 1 + 2 ))',

        '$(echo $(( 1 )) )',  # OLD BUG: arith sub within command sub

        'echo ${#array[@]} b',  # Had a bug here
        'echo $(( ${#array[@]} ))',  # Bug here

        # Had a bug: unary minus
        #'${mounted_disk_regex:0:-1}',

        'echo ${@%suffix}',  # had a bug here

        '${@}',

        'echo ${var,,}',
        'echo ${var,,?}',

        # Line continuation tests
        '${\\\nfoo}',  # VSub_1
        '${foo\\\n}',  # VSub_2
        '${foo#\\\nyo}',  # VS_ARG_UNQ
        '"${foo#\\\nyo}"',  # VS_ARG_DQ

    ]
    for expr in CASES:
      print('---')
      print(expr)
      print()

      w_parser = test_lib.InitWordParser(expr)

      while True:
        w = w_parser.ReadWord(lex_mode_e.ShCommand)
        assert w is not None

        w.PrettyPrint()

        if word_.CommandId(w) == Id.Eof_Real:
          break
Beispiel #4
0
def _PushOilTokens(parse_ctx, gr, p, lex):
  # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token
  """Push tokens onto pgen2's parser.

  Returns the last token so it can be reused/seen by the CommandParser.
  """
  #log('keywords = %s', gr.keywords)
  #log('tokens = %s', gr.tokens)

  mode = lex_mode_e.Expr
  mode_stack = [mode]
  last_token = None

  balance = 0

  from core.util import log
  while True:
    if last_token:  # e.g. left over from WordParser
      tok = last_token
      #log('last_token = %s', last_token)
      last_token = None
    else:
      tok = lex.Read(mode)
      #log('tok = %s', tok)

    # Comments and whitespace.  Newlines aren't ignored.
    if meta.LookupKind(tok.id) == Kind.Ignored:
      continue

    # For var x = {
    #   a: 1, b: 2
    # }
    if balance > 0 and tok.id == Id.Op_Newline:
      #log('*** SKIPPING NEWLINE')
      continue

    action = _MODE_TRANSITIONS.get((mode, tok.id))
    if action == POP:
      mode_stack.pop()
      mode = mode_stack[-1]
      balance -= 1
      #log('POPPED to %s', mode)
    elif action:  # it's an Id
      new_mode = action
      mode_stack.append(new_mode)
      mode = new_mode
      balance += 1  # e.g. var x = $/ NEWLINE /
      #log('PUSHED to %s', mode)
    else:
      # If we didn't already so something with the balance, look at another table.
      balance += _OTHER_BALANCE.get(tok.id, 0)
      #log('BALANCE after seeing %s = %d', tok.id, balance)

    #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
    #  tok.id = KEYWORDS[tok.val]
    #  log('Replaced with %s', tok.id)

    if tok.id.enum_id >= 256:
      raise AssertionError(str(tok))

    ilabel = _Classify(gr, tok)
    #log('tok = %s, ilabel = %d', tok, ilabel)

    if p.addtoken(tok.id.enum_id, tok, ilabel):
      return tok

    #
    # Extra handling of the body of @() and $().  Lex in the ShCommand mode.
    #

    if tok.id == Id.Left_AtParen:
      lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)

      # Blame the opening token
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)
      words = []
      while True:
        w = w_parser.ReadWord(lex_mode_e.ShCommand)
        if 0:
          log('w = %s', w)

        if isinstance(w, word__Token):
          word_id = word_.CommandId(w)
          if word_id == Id.Right_ShArrayLiteral:
            break
          elif word_id == Id.Op_Newline:  # internal newlines allowed
            continue
          else:
            # Token
            p_die('Unexpected token in array literal: %r', w.token.val, word=w)

        assert isinstance(w, word__Compound)  # for MyPy
        words.append(w)

      words2 = braces.BraceDetectAll(words)
      words3 = word_.TildeDetectAll(words2)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, words3)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      # Now push the closing )
      tok = w.token
      ilabel = _Classify(gr, tok)
      done = p.addtoken(tok.id.enum_id, tok, ilabel)
      assert not done  # can't end the expression

      continue

    if tok.id == Id.Left_DollarParen:
      left_token = tok

      lex.PushHint(Id.Op_RParen, Id.Eof_RParen)
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      c_parser = parse_ctx.MakeParserForCommandSub(line_reader, lex,
                                                   Id.Eof_RParen)
      node = c_parser.ParseCommandSub()
      # A little gross: Copied from osh/word_parse.py
      right_token = c_parser.w_parser.cur_token

      cs_part = command_sub(left_token, node)
      cs_part.spids.append(left_token.span_id)
      cs_part.spids.append(right_token.span_id)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, cs_part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      # Now push the closing )
      ilabel = _Classify(gr, right_token)
      done = p.addtoken(right_token.id.enum_id, right_token, ilabel)
      assert not done  # can't end the expression

      continue

    if tok.id == Id.Left_DoubleQuote:
      left_token = tok
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)

      parts = []  # type: List[word_part_t]
      last_token = w_parser.ReadDoubleQuoted(left_token, parts)
      expr_dq_part = double_quoted(left_token, parts)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, expr_dq_part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      continue

    if tok.id == Id.Left_DollarBrace:
      left_token = tok
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)

      part, last_token = w_parser.ReadBracedBracedVarSub(left_token)

      # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub!
      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      continue

    # '' and c''
    if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC):
      left_token = tok
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)

      # mode can be SQ or DollarSQ
      tokens = []  # type: List[token]
      no_backslashes = (left_token.val == "'")
      last_token = w_parser.ReadSingleQuoted(mode, left_token, tokens,
                                             no_backslashes)
      sq_part = single_quoted(left_token, tokens)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, sq_part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      continue

  else:
    # We never broke out -- EOF is too soon (how can this happen???)
    raise parse.ParseError("incomplete input", tok.id.enum_id, tok)
Beispiel #5
0
def _PushOilTokens(parse_ctx, gr, p, lex):
    # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token
    """Push tokens onto pgen2's parser.

  Returns the last token so it can be reused/seen by the CommandParser.
  """
    #log('keywords = %s', gr.keywords)
    #log('tokens = %s', gr.tokens)

    last_token = None  # type: Optional[token]

    balance = 0  # to ignore newlines

    while True:
        if last_token:  # e.g. left over from WordParser
            tok = last_token
            #log('last_token = %s', last_token)
            last_token = None
        else:
            tok = lex.Read(lex_mode_e.Expr)
            #log('tok = %s', tok)

        # Comments and whitespace.  Newlines aren't ignored.
        if lookup.LookupKind(tok.id) == Kind.Ignored:
            continue

        # For var x = {
        #   a: 1, b: 2
        # }
        if balance > 0 and tok.id == Id.Op_Newline:
            #log('*** SKIPPING NEWLINE')
            continue

        balance += _OTHER_BALANCE.get(tok.id, 0)
        #log('BALANCE after seeing %s = %d', tok.id, balance)

        #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
        #  tok.id = KEYWORDS[tok.val]
        #  log('Replaced with %s', tok.id)

        assert tok.id < 256, Id_str(tok.id)

        ilabel = _Classify(gr, tok)
        #log('tok = %s, ilabel = %d', tok, ilabel)

        if p.addtoken(tok.id, tok, ilabel):
            return tok

        #
        # Mututally recursive calls into the command/word parsers.
        #

        if mylib.PYTHON:
            if tok.id == Id.Left_AtParen:
                left_tok = tok
                lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)

                # Blame the opening token
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)
                words = []
                while True:
                    w = w_parser.ReadWord(lex_mode_e.ShCommand)
                    if 0:
                        log('w = %s', w)

                    if isinstance(w, word__Token):
                        word_id = word_.CommandId(w)
                        if word_id == Id.Right_ShArrayLiteral:
                            break
                        elif word_id == Id.Op_Newline:  # internal newlines allowed
                            continue
                        else:
                            # Token
                            p_die('Unexpected token in array literal: %r',
                                  w.token.val,
                                  word=w)

                    assert isinstance(w, word__Compound)  # for MyPy
                    words.append(w)

                words2 = braces.BraceDetectAll(words)
                words3 = word_.TildeDetectAll(words2)

                typ = Id.Expr_CastedDummy

                lit_part = sh_array_literal(left_tok, words3)
                opaque = cast(token, lit_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                # Now push the closing )
                tok = w.token
                ilabel = _Classify(gr, tok)
                done = p.addtoken(tok.id, tok, ilabel)
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DollarParen:
                left_token = tok

                lex.PushHint(Id.Op_RParen, Id.Eof_RParen)
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                c_parser = parse_ctx.MakeParserForCommandSub(
                    line_reader, lex, Id.Eof_RParen)
                node = c_parser.ParseCommandSub()
                # A little gross: Copied from osh/word_parse.py
                right_token = c_parser.w_parser.cur_token

                cs_part = command_sub(left_token, node)
                cs_part.spids.append(left_token.span_id)
                cs_part.spids.append(right_token.span_id)

                typ = Id.Expr_CastedDummy
                opaque = cast(token, cs_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                # Now push the closing )
                ilabel = _Classify(gr, right_token)
                done = p.addtoken(right_token.id, right_token, ilabel)
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DoubleQuote:
                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                parts = []  # type: List[word_part_t]
                last_token = w_parser.ReadDoubleQuoted(left_token, parts)
                expr_dq_part = double_quoted(left_token, parts)

                typ = Id.Expr_CastedDummy
                opaque = cast(token, expr_dq_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DollarBrace:
                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                part, last_token = w_parser.ReadBracedBracedVarSub(left_token)

                # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub!
                typ = Id.Expr_CastedDummy
                opaque = cast(token, part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                continue

            # '' and c''
            if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC):
                if tok.id == Id.Left_SingleQuoteRaw:
                    sq_mode = lex_mode_e.SQ_Raw
                else:
                    sq_mode = lex_mode_e.SQ_C

                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                tokens = []  # type: List[token]
                no_backslashes = (left_token.val == "'")
                last_token = w_parser.ReadSingleQuoted(sq_mode, left_token,
                                                       tokens, no_backslashes)
                sq_part = single_quoted(left_token, tokens)

                typ = Id.Expr_CastedDummy
                opaque = cast(token, sq_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression
                continue

    else:
        # We never broke out -- EOF is too soon (how can this happen???)
        raise parse.ParseError("incomplete input", tok.id, tok)
Beispiel #6
0
 def CurrentId(self):
   # type: () -> Id_t
   """Glue used by the WordParser to check for extra tokens."""
   return word_.CommandId(self.cur_word)
Beispiel #7
0
  def _ReadArrayLiteral(self):
    # type: () -> word_part_t
    """
    a=(1 2 3)

    TODO: See osh/cmd_parse.py:164 for Id.Lit_ArrayLhsOpen, for a[x++]=1

    We want:

    A=(['x']=1 ["x"]=2 [$x$y]=3)

    Maybe allow this as a literal string?  Because I think I've seen it before?
    Or maybe force people to patch to learn the rule.

    A=([x]=4)

    Starts with Lit_Other '[', and then it has Lit_ArrayLhsClose
    Maybe enforce that ALL have keys or NONE of have keys.
    """
    self._Next(lex_mode_e.ShCommand)  # advance past (
    self._Peek()
    if self.cur_token.id != Id.Op_LParen:
      p_die('Expected ( after =, got %r', self.cur_token.val,
            token=self.cur_token)
    left_token = self.cur_token
    paren_spid = self.cur_token.span_id

    # MUST use a new word parser (with same lexer).
    w_parser = self.parse_ctx.MakeWordParser(self.lexer, self.line_reader)
    words = []
    while True:
      w = w_parser.ReadWord(lex_mode_e.ShCommand)

      if isinstance(w, word__Token):
        word_id = word_.CommandId(w)
        if word_id == Id.Right_ShArrayLiteral:
          break
        # Unlike command parsing, array parsing allows embedded \n.
        elif word_id == Id.Op_Newline:
          continue
        else:
          # Token
          p_die('Unexpected token in array literal: %r', w.token.val, word=w)

      assert isinstance(w, word__Compound)  # for MyPy
      words.append(w)

    if not words:  # a=() is empty indexed array
      # ignore for invariant List?
      node = sh_array_literal(left_token, words)  # type: ignore
      node.spids.append(left_token.span_id)
      return node
 
    # If the first one is a key/value pair, then the rest are assumed to be.
    pair = word_.DetectAssocPair(words[0])
    if pair:
      pairs = [pair[0], pair[1]]  # flat representation

      n = len(words)
      for i in xrange(1, n):
        w = words[i]
        pair = word_.DetectAssocPair(w)
        if not pair:
          p_die("Expected associative array pair", word=w)

        pairs.append(pair[0])  # flat representation
        pairs.append(pair[1])

      # invariant List?
      node = word_part.AssocArrayLiteral(left_token, pairs)  # type: ignore
      node.spids.append(paren_spid)
      return node

    words2 = braces.BraceDetectAll(words)
    words3 = word_.TildeDetectAll(words2)
    node = sh_array_literal(left_token, words3)
    node.spids.append(paren_spid)
    return node