def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = word_part.Literal(token(Id.Lit_Chars, 'default')) arg_word = word.Compound([part]) test_op = suffix_op.Unary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def _ReadLikeDQ(self, left_dq_token, out_parts): # type: (Optional[token], List[word_part_t]) -> None """ Args: left_dq_token: A token if we are reading a double quoted part, or None if we're reading a here doc. out_parts: list of word_part to append to """ done = False while not done: self._Next(lex_mode_e.DQ) self._Peek() if self.token_kind == Kind.Lit: if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteral(self.cur_token) # type: word_part_t else: part = word_part.Literal(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadDoubleQuotedLeftParts() out_parts.append(part) elif self.token_kind == Kind.VSub: part = simple_var_sub(self.cur_token) out_parts.append(part) # NOTE: parsing "$f(x)" would BREAK CODE. Could add a more for it # later. elif self.token_kind == Kind.Right: assert self.token_type == Id.Right_DoubleQuote, self.token_type if left_dq_token: done = True else: # In a here doc, the right quote is literal! out_parts.append(word_part.Literal(self.cur_token)) elif self.token_kind == Kind.Eof: if left_dq_token: p_die('Unexpected EOF reading double-quoted string that began here', token=left_dq_token) else: # here docs will have an EOF in their token stream done = True else: raise AssertionError(self.cur_token)
def _ReadPatSubVarOp(self, lex_mode): # type: (lex_mode_t) -> suffix_op__PatSub """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False) assert isinstance(pat, word__Compound) # Because empty_ok=False if len(pat.parts) == 1: ok, s, quoted = word_.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode) self._Peek() p = word_part.Literal(self.cur_token) pat.parts.append(p) if len(pat.parts) == 0: p_die('Pattern in ${x/pat/replace} must not be empty', token=self.cur_token) replace_mode = Id.Undefined_Tok # Check for / # % modifier on pattern. first_part = pat.parts[0] if isinstance(first_part, word_part__Literal): lit_id = first_part.token.id if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent): pat.parts.pop(0) replace_mode = lit_id # NOTE: If there is a modifier, the pattern can be empty, e.g. # ${s/#/foo} and ${a/%/foo}. if self.token_type == Id.Right_DollarBrace: # e.g. ${v/a} is the same as ${v/a/} -- empty replacement string return suffix_op.PatSub(pat, None, replace_mode) if self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode) # do not stop at / self._Peek() if self.token_type != Id.Right_DollarBrace: # NOTE: I think this never happens. # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything # there is Lit_ or Left_, except for }. p_die("Expected } after replacement string, got %s", self.cur_token, token=self.cur_token) return suffix_op.PatSub(pat, replace, replace_mode) # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh p_die("Expected } after pat sub, got %r", self.cur_token.val, token=self.cur_token)
def _ExpandPart( parts, # type: List[word_part_t] first_alt_index, # type: int suffixes, # type: List[List[word_part_t]] ): # type: (...) -> List[List[word_part_t]] """Mutually recursive with _BraceExpand. Args: parts: input parts first_alt_index: index of the first BracedTuple suffixes: List of suffixes to append. """ out = [] prefix = parts[:first_alt_index] expand_part = parts[first_alt_index] if isinstance(expand_part, word_part__BracedTuple): # Call _BraceExpand on each of the inner words too! expanded_alts = [] # type: List[List[word_part_t]] for w in expand_part.words: assert isinstance(w, word__Compound) # for MyPy expanded_alts.extend(_BraceExpand(w.parts)) for alt_parts in expanded_alts: for suffix in suffixes: out_parts = [] # type: List[word_part_t] out_parts.extend(prefix) out_parts.extend(alt_parts) out_parts.extend(suffix) out.append(out_parts) elif isinstance(expand_part, word_part__BracedRange): # Not mutually recursive with _BraceExpand strs = _RangeStrings(expand_part) for s in strs: for suffix in suffixes: out_parts_ = [] # type: List[word_part_t] out_parts_.extend(prefix) # Preserve span_id from the original t = token(Id.Lit_Chars, s, expand_part.spids[0]) out_parts_.append(word_part.Literal(t)) out_parts_.extend(suffix) out.append(out_parts_) else: raise AssertionError return out
def testMultiLine(self): w_parser = test_lib.InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [word_part.Literal(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, word.Compound(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [word_part.Literal(token(Id.Lit_Chars, 'foo'))] test_lib.AssertAsdlEqual(self, word.Compound(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, word.Token(t), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [word_part.Literal(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, word.Compound(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [word_part.Literal(token(Id.Lit_Chars, 'bar'))] test_lib.AssertAsdlEqual(self, word.Compound(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, word.Token(t), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) t = token(Id.Eof_Real, '') test_lib.AssertAsdlEqual(self, word.Token(t), w)
def ErrorWord(fmt, err): # type: (str, _ErrorWithLocation) -> word__Compound error_str = fmt % err.UserErrorString() t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) return word.Compound([word_part.Literal(t)])
def Expr(self, pnode): # type: (PNode) -> expr_t """Transform expressions (as opposed to statements).""" typ = pnode.typ tok = pnode.tok children = pnode.children if ISNONTERMINAL(typ): c = '-' if not children else len(children) #log('non-terminal %s %s', nt_name, c) if typ == grammar_nt.oil_expr: # for if/while # oil_expr: '(' testlist ')' return self.Expr(children[1]) if typ == grammar_nt.return_expr: # for if/while # return_expr: testlist end_stmt return self.Expr(children[0]) if typ == grammar_nt.lvalue_list: return self._AssocBinary(children) if typ == grammar_nt.atom: return self.atom(children) if typ == grammar_nt.eval_input: # testlist_input: testlist NEWLINE* ENDMARKER return self.Expr(children[0]) if typ == grammar_nt.testlist: # testlist: test (',' test)* [','] return self._AssocBinary(children) elif typ == grammar_nt.arith_expr: # expr: term (('+'|'-') term)* return self._AssocBinary(children) elif typ == grammar_nt.term: # term: factor (('*'|'/'|'div'|'mod') factor)* return self._AssocBinary(children) elif typ == grammar_nt.expr: # expr: xor_expr ('|' xor_expr)* return self._AssocBinary(children) elif typ == grammar_nt.shift_expr: # shift_expr: arith_expr (('<<'|'>>') arith_expr)* return self._AssocBinary(children) elif typ == grammar_nt.comparison: # comparison: expr (comp_op expr)* return self._AssocBinary(children) elif typ == grammar_nt.factor: # factor: ('+'|'-'|'~') factor | power # the power would have already been reduced assert len(children) == 2, children op, e = children assert isinstance(op.tok, token) return expr.Unary(op.tok, self.Expr(e)) elif typ == grammar_nt.atom_expr: # atom_expr: ['await'] atom trailer* # NOTE: This would be shorter in a recursive style. base = self.Expr(children[0]) n = len(children) for i in xrange(1, n): pnode = children[i] tok = pnode.tok base = self.trailer(base, pnode) return base elif typ == grammar_nt.power: # power: atom_expr ['^' factor] # This doesn't repeat, so it doesn't matter if it's left or right # associative. return self._AssocBinary(children) elif typ == grammar_nt.array_literal: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] items = [expr.Const(t) for t in tokens] # type: List[expr_t] return expr.ArrayLiteral(left_tok, items) elif typ == grammar_nt.sh_array_literal: left_tok = children[0].tok # HACK: When typ is Id.Expr_WordsDummy, the 'tok' field ('opaque') # actually has a list of words! typ1 = children[1].typ assert typ1 == Id.Expr_WordsDummy.enum_id, typ1 array_words = cast('List[word_t]', children[1].tok) return expr.ShellArrayLiteral(left_tok, array_words) elif typ == grammar_nt.regex_literal: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Expr_Name ] parts = [regex.Var(t) for t in tokens] # type: List[regex_t] return expr.RegexLiteral(left_tok, regex.Concat(parts)) elif typ == grammar_nt.command_sub: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] words = [ word.Compound([word_part.Literal(t)]) for t in tokens ] # type: List[word_t] return expr.CommandSub(left_tok, command.Simple(words)) elif typ == grammar_nt.sh_command_sub: left_tok = children[0].tok # HACK: When typ is Id.Expr_CommandDummy, the 'tok' field ('opaque') # actually has a word_part.CommandSub! typ1 = children[1].typ assert typ1 == Id.Expr_CommandDummy.enum_id, typ1 cs_part = cast(word_part__CommandSub, children[1].tok) # Awkward: the schemas are different expr_part = expr.CommandSub(cs_part.left_token, cs_part.command_list) expr_part.spids.extend(cs_part.spids) return expr_part elif typ == grammar_nt.var_sub: left_tok = children[0].tok return expr.VarSub(left_tok, self.Expr(children[1])) elif typ == grammar_nt.dq_string: left_tok = children[0].tok tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] parts2 = [word_part.Literal(t) for t in tokens] # type: List[word_part_t] return expr.DoubleQuoted(left_tok, parts2) else: nt_name = self.number2symbol[typ] raise AssertionError("PNode type %d (%s) wasn't handled" % (typ, nt_name)) else: # Terminals should have a token #log('terminal %s', tok) if tok.id == Id.Expr_Name: return expr.Var(tok) elif tok.id == Id.Expr_Digits: return expr.Const(tok) else: raise AssertionError(tok.id)
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=lex_mode_e.ShCommand, empty_ok=True): # type: (Id_t, lex_mode_t, bool) -> word__Compound """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ w = word.Compound() num_parts = 0 brace_count = 0 done = False while not done: self._Peek() allow_done = empty_ok or num_parts != 0 if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in ( Kind.Lit, Kind.History, Kind.KW, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteral(self.cur_token) # type: word_part_t else: part = word_part.Literal(self.cur_token) if self.token_type == Id.Lit_VarLike and num_parts == 0: # foo= w.parts.append(part) # Unfortunately it's awkward to pull the check for a=(1 2) up to # _ReadWord. t = self.lexer.LookAhead(lex_mode_e.ShCommand) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) part2 = self._ReadArrayLiteral() w.parts.append(part2) # Array literal must be the last part of the word. self._Next(lex_mode) self._Peek() # EOF, whitespace, newline, Right_Subshell if self.token_kind not in self.KINDS_THAT_END_WORDS: p_die('Unexpected token after array literal', token=self.cur_token) done = True elif (self.parse_opts.at and self.token_type == Id.Lit_Splice and num_parts == 0): splice_token = self.cur_token t = self.lexer.LookAhead(lex_mode_e.ShCommand) if t.id == Id.Op_LParen: # @arrayfunc(x) arglist = arg_list() self._ParseCallArguments(arglist) part = word_part.FuncCall(splice_token, arglist) else: part = word_part.Splice(splice_token) w.parts.append(part) # @words or @arrayfunc() must be the last part of the word self._Next(lex_mode) self._Peek() # EOF, whitespace, newline, Right_Subshell if self.token_kind not in self.KINDS_THAT_END_WORDS: p_die('Unexpected token after array splice', token=self.cur_token) done = True else: # Syntax error for { and } if self.token_type == Id.Lit_LBrace: brace_count += 1 elif self.token_type == Id.Lit_RBrace: brace_count -= 1 # not a literal with lookahead; append it w.parts.append(part) elif self.token_kind == Kind.VSub: vsub_token = self.cur_token part = simple_var_sub(vsub_token) if self.token_type == Id.VSub_DollarName: # Look ahead for $strfunc(x) # $f(x) or --name=$f(x) is allowed # but "--name=$f(x)" not allowed? This would BREAK EXISTING CODE. # It would need a parse option. t = self.lexer.LookAhead(lex_mode_e.ShCommand) if t.id == Id.Op_LParen: arglist = arg_list() self._ParseCallArguments(arglist) part = word_part.FuncCall(vsub_token, arglist) # Unlike @arrayfunc(x), it makes sense to allow $f(1)$f(2) # var a = f(1); var b = f(2); echo $a$b # It's consistent with other uses of $. w.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlob() w.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadLeftParts() w.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass # Never happens, no PushHint for this case. #elif self.token_type == Id.Right_DollarParen: # pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 if self.parse_opts.brace and num_parts > 1 and brace_count != 0: # accept { and }, but not foo{ p_die( 'Word has unbalanced { }. Maybe add a space or quote it like \{', word=w) return w