def ReadWord(self, lex_mode): # type: (lex_mode_t) -> word_t """Read the next Word. Returns: Word, or None if there was an error """ # For integration with pgen2 if self.buffered_word: w = self.buffered_word self.buffered_word = None else: # Implementation note: This is an stateful/iterative function that calls # the stateless "_ReadWord" function. while True: if lex_mode == lex_mode_e.Arith: # TODO: Can this be unified? w, need_more = self._ReadArithWord() elif lex_mode in ( lex_mode_e.ShCommand, lex_mode_e.DBracket, lex_mode_e.BashRegex): w, need_more = self._ReadWord(lex_mode) else: raise AssertionError('Invalid lex state %s' % lex_mode) if not need_more: break self.cursor = w # TODO: Do consolidation of newlines in the lexer? # Note that there can be an infinite (Id.Ignored_Comment Id.Op_Newline # Id.Ignored_Comment Id.Op_Newline) sequence, so we have to keep track of # the last non-ignored token. self.cursor_was_newline = (word_.CommandId(self.cursor) == Id.Op_Newline) return self.cursor
def testReadArith(self): CASES = [ '1 + 2', 'a + b', '$a * $b', '${a} * ${b}', '$(echo 1) * $(echo 2)', '`echo 1` + 2', '$((1 + 2)) * $((3 + 4))', "'single quoted'", # Allowed by oil but not bash '"${a}" + "${b}"', # Ditto '$# + $$', # This doesn't work but does in bash -- should be 15 #'$(( $(echo 1)$(echo 2) + 3 ))', '$(( x[0] < 5 ))', '$(( ++i ))', '$(( i++ ))', '$(( x -= 1))', '$(( x |= 1))', '$(( x[0] = 1 ))', '$(( 1 | 0 ))', '$((0x$size))', ] for expr in CASES: print('---') print(expr) print() w_parser = test_lib.InitWordParser(expr) w_parser._Next(lex_mode_e.Arith) # Can we remove this requirement? while True: w = w_parser.ReadWord(lex_mode_e.Arith) assert w is not None w.PrettyPrint() if word_.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok): break
def testRead(self): CASES = [ 'ls "foo"', '$(( 1 + 2 ))', '$(echo $(( 1 )) )', # OLD BUG: arith sub within command sub 'echo ${#array[@]} b', # Had a bug here 'echo $(( ${#array[@]} ))', # Bug here # Had a bug: unary minus #'${mounted_disk_regex:0:-1}', 'echo ${@%suffix}', # had a bug here '${@}', 'echo ${var,,}', 'echo ${var,,?}', # Line continuation tests '${\\\nfoo}', # VSub_1 '${foo\\\n}', # VSub_2 '${foo#\\\nyo}', # VS_ARG_UNQ '"${foo#\\\nyo}"', # VS_ARG_DQ ] for expr in CASES: print('---') print(expr) print() w_parser = test_lib.InitWordParser(expr) while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) assert w is not None w.PrettyPrint() if word_.CommandId(w) == Id.Eof_Real: break
def _PushOilTokens(parse_ctx, gr, p, lex): # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) mode = lex_mode_e.Expr mode_stack = [mode] last_token = None balance = 0 from core.util import log while True: if last_token: # e.g. left over from WordParser tok = last_token #log('last_token = %s', last_token) last_token = None else: tok = lex.Read(mode) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if meta.LookupKind(tok.id) == Kind.Ignored: continue # For var x = { # a: 1, b: 2 # } if balance > 0 and tok.id == Id.Op_Newline: #log('*** SKIPPING NEWLINE') continue action = _MODE_TRANSITIONS.get((mode, tok.id)) if action == POP: mode_stack.pop() mode = mode_stack[-1] balance -= 1 #log('POPPED to %s', mode) elif action: # it's an Id new_mode = action mode_stack.append(new_mode) mode = new_mode balance += 1 # e.g. var x = $/ NEWLINE / #log('PUSHED to %s', mode) else: # If we didn't already so something with the balance, look at another table. balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) if tok.id.enum_id >= 256: raise AssertionError(str(tok)) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id.enum_id, tok, ilabel): return tok # # Extra handling of the body of @() and $(). Lex in the ShCommand mode. # if tok.id == Id.Left_AtParen: lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) # Blame the opening token line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) words = [] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if 0: log('w = %s', w) if isinstance(w, word__Token): word_id = word_.CommandId(w) if word_id == Id.Right_ShArrayLiteral: break elif word_id == Id.Op_Newline: # internal newlines allowed continue else: # Token p_die('Unexpected token in array literal: %r', w.token.val, word=w) assert isinstance(w, word__Compound) # for MyPy words.append(w) words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, words3) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) tok = w.token ilabel = _Classify(gr, tok) done = p.addtoken(tok.id.enum_id, tok, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DollarParen: left_token = tok lex.PushHint(Id.Op_RParen, Id.Eof_RParen) line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) c_parser = parse_ctx.MakeParserForCommandSub(line_reader, lex, Id.Eof_RParen) node = c_parser.ParseCommandSub() # A little gross: Copied from osh/word_parse.py right_token = c_parser.w_parser.cur_token cs_part = command_sub(left_token, node) cs_part.spids.append(left_token.span_id) cs_part.spids.append(right_token.span_id) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, cs_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, right_token) done = p.addtoken(right_token.id.enum_id, right_token, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DoubleQuote: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) parts = [] # type: List[word_part_t] last_token = w_parser.ReadDoubleQuoted(left_token, parts) expr_dq_part = double_quoted(left_token, parts) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, expr_dq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue if tok.id == Id.Left_DollarBrace: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) part, last_token = w_parser.ReadBracedBracedVarSub(left_token) # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub! typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue # '' and c'' if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC): left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) # mode can be SQ or DollarSQ tokens = [] # type: List[token] no_backslashes = (left_token.val == "'") last_token = w_parser.ReadSingleQuoted(mode, left_token, tokens, no_backslashes) sq_part = single_quoted(left_token, tokens) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, sq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id.enum_id, tok)
def _PushOilTokens(parse_ctx, gr, p, lex): # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) last_token = None # type: Optional[token] balance = 0 # to ignore newlines while True: if last_token: # e.g. left over from WordParser tok = last_token #log('last_token = %s', last_token) last_token = None else: tok = lex.Read(lex_mode_e.Expr) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if lookup.LookupKind(tok.id) == Kind.Ignored: continue # For var x = { # a: 1, b: 2 # } if balance > 0 and tok.id == Id.Op_Newline: #log('*** SKIPPING NEWLINE') continue balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) assert tok.id < 256, Id_str(tok.id) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id, tok, ilabel): return tok # # Mututally recursive calls into the command/word parsers. # if mylib.PYTHON: if tok.id == Id.Left_AtParen: left_tok = tok lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) # Blame the opening token line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) words = [] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if 0: log('w = %s', w) if isinstance(w, word__Token): word_id = word_.CommandId(w) if word_id == Id.Right_ShArrayLiteral: break elif word_id == Id.Op_Newline: # internal newlines allowed continue else: # Token p_die('Unexpected token in array literal: %r', w.token.val, word=w) assert isinstance(w, word__Compound) # for MyPy words.append(w) words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) typ = Id.Expr_CastedDummy lit_part = sh_array_literal(left_tok, words3) opaque = cast(token, lit_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) tok = w.token ilabel = _Classify(gr, tok) done = p.addtoken(tok.id, tok, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DollarParen: left_token = tok lex.PushHint(Id.Op_RParen, Id.Eof_RParen) line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) c_parser = parse_ctx.MakeParserForCommandSub( line_reader, lex, Id.Eof_RParen) node = c_parser.ParseCommandSub() # A little gross: Copied from osh/word_parse.py right_token = c_parser.w_parser.cur_token cs_part = command_sub(left_token, node) cs_part.spids.append(left_token.span_id) cs_part.spids.append(right_token.span_id) typ = Id.Expr_CastedDummy opaque = cast(token, cs_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, right_token) done = p.addtoken(right_token.id, right_token, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DoubleQuote: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) parts = [] # type: List[word_part_t] last_token = w_parser.ReadDoubleQuoted(left_token, parts) expr_dq_part = double_quoted(left_token, parts) typ = Id.Expr_CastedDummy opaque = cast(token, expr_dq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue if tok.id == Id.Left_DollarBrace: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) part, last_token = w_parser.ReadBracedBracedVarSub(left_token) # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub! typ = Id.Expr_CastedDummy opaque = cast(token, part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue # '' and c'' if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC): if tok.id == Id.Left_SingleQuoteRaw: sq_mode = lex_mode_e.SQ_Raw else: sq_mode = lex_mode_e.SQ_C left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) tokens = [] # type: List[token] no_backslashes = (left_token.val == "'") last_token = w_parser.ReadSingleQuoted(sq_mode, left_token, tokens, no_backslashes) sq_part = single_quoted(left_token, tokens) typ = Id.Expr_CastedDummy opaque = cast(token, sq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id, tok)
def CurrentId(self): # type: () -> Id_t """Glue used by the WordParser to check for extra tokens.""" return word_.CommandId(self.cur_word)
def _ReadArrayLiteral(self): # type: () -> word_part_t """ a=(1 2 3) TODO: See osh/cmd_parse.py:164 for Id.Lit_ArrayLhsOpen, for a[x++]=1 We want: A=(['x']=1 ["x"]=2 [$x$y]=3) Maybe allow this as a literal string? Because I think I've seen it before? Or maybe force people to patch to learn the rule. A=([x]=4) Starts with Lit_Other '[', and then it has Lit_ArrayLhsClose Maybe enforce that ALL have keys or NONE of have keys. """ self._Next(lex_mode_e.ShCommand) # advance past ( self._Peek() if self.cur_token.id != Id.Op_LParen: p_die('Expected ( after =, got %r', self.cur_token.val, token=self.cur_token) left_token = self.cur_token paren_spid = self.cur_token.span_id # MUST use a new word parser (with same lexer). w_parser = self.parse_ctx.MakeWordParser(self.lexer, self.line_reader) words = [] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if isinstance(w, word__Token): word_id = word_.CommandId(w) if word_id == Id.Right_ShArrayLiteral: break # Unlike command parsing, array parsing allows embedded \n. elif word_id == Id.Op_Newline: continue else: # Token p_die('Unexpected token in array literal: %r', w.token.val, word=w) assert isinstance(w, word__Compound) # for MyPy words.append(w) if not words: # a=() is empty indexed array # ignore for invariant List? node = sh_array_literal(left_token, words) # type: ignore node.spids.append(left_token.span_id) return node # If the first one is a key/value pair, then the rest are assumed to be. pair = word_.DetectAssocPair(words[0]) if pair: pairs = [pair[0], pair[1]] # flat representation n = len(words) for i in xrange(1, n): w = words[i] pair = word_.DetectAssocPair(w) if not pair: p_die("Expected associative array pair", word=w) pairs.append(pair[0]) # flat representation pairs.append(pair[1]) # invariant List? node = word_part.AssocArrayLiteral(left_token, pairs) # type: ignore node.spids.append(paren_spid) return node words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) node = sh_array_literal(left_token, words3) node.spids.append(paren_spid) return node