def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = word_part.LiteralPart(token(Id.Lit_Chars, 'default')) arg_word = osh_word.CompoundWord([part]) test_op = suffix_op.StringUnary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def EvalPrompt(self, val): """Perform the two evaluations that bash does. Used by $PS1 and ${x@P}.""" if val.tag != value_e.Str: return self.default_prompt # no evaluation necessary # Parse backslash escapes (cached) try: tokens = self.tokens_cache[val.s] except KeyError: tokens = list(match.PS1_LEXER.Tokens(val.s)) self.tokens_cache[val.s] = tokens # Replace values. ps1_str = self._ReplaceBackslashCodes(tokens) # Parse it like a double-quoted word (cached). # NOTE: This is copied from the PS4 logic in Tracer. try: ps1_word = self.parse_cache[ps1_str] except KeyError: w_parser = self.parse_ctx.MakeWordParserForPlugin( ps1_str, self.arena) try: ps1_word = w_parser.ReadForPlugin() except Exception as e: error_str = '<ERROR: cannot parse PS1>' t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps1_word = word.CompoundWord([word_part.LiteralPart(t)]) self.parse_cache[ps1_str] = ps1_word # Evaluate, e.g. "${debian_chroot}\u" -> '\u' # TODO: Handle runtime errors like unset variables, etc. val2 = self.ex.word_ev.EvalWordToString(ps1_word) return val2.s
def _ReadLikeDQ(self, left_dq_token, out_parts): # type: (Optional[token], List[word_part_t]) -> None """ Args: left_dq_token: A token if we are reading a double quoted part, or None if we're reading a here doc. out_parts: list of word_part to append to """ done = False while not done: self._Next(lex_mode_e.DQ) self._Peek() if self.token_kind == Kind.Lit: if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadDoubleQuotedLeftParts() out_parts.append(part) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Right: assert self.token_type == Id.Right_DoubleQuote, self.token_type if left_dq_token: done = True else: # In a here doc, the right quote is literal! out_parts.append(word_part.LiteralPart(self.cur_token)) elif self.token_kind == Kind.Eof: if left_dq_token: p_die( 'Unexpected EOF reading double-quoted string that began here', token=left_dq_token) else: # here docs will have an EOF in their token stream done = True else: raise AssertionError(self.cur_token)
def _ReadPatSubVarOp(self, lex_mode): # type: (lex_mode_t) -> suffix_op__PatSub """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False) assert isinstance(pat, word__CompoundWord) # Because empty_ok=False if len(pat.parts) == 1: ok, s, quoted = word.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode) self._Peek() p = word_part.LiteralPart(self.cur_token) pat.parts.append(p) if len(pat.parts) == 0: p_die('Pattern in ${x/pat/replace} must not be empty', token=self.cur_token) replace_mode = Id.Undefined_Tok # Check for / # % modifier on pattern. first_part = pat.parts[0] if isinstance(first_part, word_part__LiteralPart): lit_id = first_part.token.id if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent): pat.parts.pop(0) replace_mode = lit_id # NOTE: If there is a modifier, the pattern can be empty, e.g. # ${s/#/foo} and ${a/%/foo}. if self.token_type == Id.Right_VarSub: # e.g. ${v/a} is the same as ${v/a/} -- empty replacement string return suffix_op.PatSub(pat, None, replace_mode) if self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode) # do not stop at / self._Peek() if self.token_type != Id.Right_VarSub: # NOTE: I think this never happens. # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything # there is Lit_ or Left_, except for }. p_die("Expected } after replacement string, got %s", self.cur_token, token=self.cur_token) return suffix_op.PatSub(pat, replace, replace_mode) # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh p_die("Expected } after pat sub, got %r", self.cur_token.val, token=self.cur_token)
def _ExpandPart( parts, # type: List[word_part_t] first_alt_index, # type: int suffixes, # type: List[List[word_part_t]] ): # type: (...) -> List[List[word_part_t]] """Mutually recursive with _BraceExpand. Args: parts: input parts first_alt_index: index of the first BracedTuple suffixes: List of suffixes to append. """ out = [] prefix = parts[:first_alt_index] expand_part = parts[first_alt_index] if isinstance(expand_part, word_part__BracedTuple): # Call _BraceExpand on each of the inner words too! expanded_alts = [] # type: List[List[word_part_t]] for w in expand_part.words: assert isinstance(w, word__CompoundWord) # for MyPy expanded_alts.extend(_BraceExpand(w.parts)) for alt_parts in expanded_alts: for suffix in suffixes: out_parts = [] # type: List[word_part_t] out_parts.extend(prefix) out_parts.extend(alt_parts) out_parts.extend(suffix) out.append(out_parts) elif isinstance(expand_part, word_part__BracedRange): # Not mutually recursive with _BraceExpand strs = _RangeStrings(expand_part) for s in strs: for suffix in suffixes: out_parts_ = [] # type: List[word_part_t] out_parts_.extend(prefix) # Preserve span_id from the original t = token(Id.Lit_Chars, s, expand_part.spids[0]) out_parts_.append(word_part.LiteralPart(t)) out_parts_.extend(suffix) out.append(out_parts_) else: raise AssertionError return out
def testMultiLine(self): w_parser = _InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'foo'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'bar'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Eof_Real, '') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)
def _EvalPS4(self): """For set -x.""" val = self.mem.GetVar('PS4') assert val.tag == value_e.Str s = val.s if s: first_char, ps4 = s[0], s[1:] else: first_char, ps4 = '+', ' ' # default # NOTE: This cache is slightly broken because aliases are mutable! I think # thati s more or less harmless though. try: ps4_word = self.parse_cache[ps4] except KeyError: # We have to parse this at runtime. PS4 should usually remain constant. w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4, self.arena) try: ps4_word = w_parser.ReadForPlugin() except util.ParseError as e: error_str = '<ERROR: cannot parse PS4>' t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps4_word = osh_word.CompoundWord([word_part.LiteralPart(t)]) self.parse_cache[ps4] = ps4_word #print(ps4_word) # TODO: Repeat first character according process stack depth. Where is # that stored? In the executor itself? It should be stored along with # the PID. Need some kind of ShellProcessState or something. # # We should come up with a better mechanism. Something like $PROC_INDENT # and $OIL_XTRACE_PREFIX. # TODO: Handle runtime errors! For example, you could PS4='$(( 1 / 0 ))' # <ERROR: cannot evaluate PS4> prefix = self.word_ev.EvalWordToString(ps4_word) return first_char, prefix.s
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=lex_mode_e.Outer, empty_ok=True): # type: (Id_t, lex_mode_t, bool) -> word__CompoundWord """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ word = osh_word.CompoundWord() num_parts = 0 done = False while not done: self._Peek() allow_done = empty_ok or num_parts != 0 if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in (Kind.Lit, Kind.History, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) word.parts.append(part) if self.token_type == Id.Lit_VarLike: # foo= t = self.lexer.LookAhead(lex_mode_e.Outer) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral) part2 = self._ReadArrayLiteralPart() word.parts.append(part2) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) word.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlobPart() word.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadLeftParts() word.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass elif self.token_type == Id.Right_CommandSub: pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 return word
def ErrorWord(fmt, err): # type: (str, _ErrorWithLocation) -> word__CompoundWord error_str = fmt % err.UserErrorString() t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) return word.CompoundWord([word_part.LiteralPart(t)])
def Expr(self, pnode): # type: (PNode) -> expr_t """Walk the homogeneous parse tree and create a typed AST.""" typ = pnode.typ tok = pnode.tok children = pnode.children #if typ in self.number2symbol: # non-terminal if ISNONTERMINAL(typ): c = '-' if not children else len(children) #log('non-terminal %s %s', nt_name, c) if typ == grammar_nt.lvalue_list: return self._AssocBinary(children) if typ == grammar_nt.atom: if children[0].tok.id == Id.Op_LParen: return self.Expr(children[1]) else: raise NotImplementedError if typ == grammar_nt.eval_input: # testlist_input: testlist NEWLINE* ENDMARKER return self.Expr(children[0]) if typ == grammar_nt.testlist: # testlist: test (',' test)* [','] return self._AssocBinary(children) elif typ == grammar_nt.arith_expr: # expr: term (('+'|'-') term)* return self._AssocBinary(children) elif typ == grammar_nt.term: # term: factor (('*'|'/'|'div'|'mod') factor)* return self._AssocBinary(children) elif typ == grammar_nt.expr: # expr: xor_expr ('|' xor_expr)* return self._AssocBinary(children) elif typ == grammar_nt.shift_expr: # shift_expr: arith_expr (('<<'|'>>') arith_expr)* return self._AssocBinary(children) elif typ == grammar_nt.comparison: # comparison: expr (comp_op expr)* return self._AssocBinary(children) elif typ == grammar_nt.factor: # factor: ('+'|'-'|'~') factor | power # the power would have already been reduced assert len(children) == 2, children op, e = children assert isinstance(op.tok, syntax_asdl.token) return expr.Unary(op.tok, self.Expr(e)) elif typ == grammar_nt.atom_expr: # atom_expr: ['await'] atom trailer* # NOTE: This would be shorter in a recursive style. base = self.Expr(children[0]) n = len(children) for i in xrange(1, n): pnode = children[i] tok = pnode.tok base = self._Trailer(base, pnode) return base elif typ == grammar_nt.power: # power: atom_expr ['^' factor] # This doesn't repeat, so it doesn't matter if it's left or right # associative. return self._AssocBinary(children) elif typ == grammar_nt.array_literal: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] array_words = [ word.CompoundWord([word_part.LiteralPart(t)]) for t in tokens ] # type: List[word_t] return expr.ArrayLiteral(left_tok, array_words) elif typ == grammar_nt.regex_literal: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Expr_Name ] parts = [regex.Var(t) for t in tokens] # type: List[regex_t] return expr.RegexLiteral(left_tok, regex.Concat(parts)) elif typ == grammar_nt.command_sub: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] words = [ word.CompoundWord([word_part.LiteralPart(t)]) for t in tokens ] # type: List[word_t] return expr.CommandSub(left_tok, command.SimpleCommand(words)) elif typ == grammar_nt.expr_sub: left_tok = children[0].tok return expr.ExprSub(left_tok, self.Expr(children[1])) elif typ == grammar_nt.var_sub: left_tok = children[0].tok return expr.VarSub(left_tok, self.Expr(children[1])) elif typ == grammar_nt.dq_string: left_tok = children[0].tok tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] parts2 = [oil_word_part.Literal(t) for t in tokens] # type: List[oil_word_part_t] return expr.DoubleQuoted(left_tok, parts2) else: nt_name = self.number2symbol[typ] raise AssertionError("PNode type %d (%s) wasn't handled" % (typ, nt_name)) else: # Terminals should have a token #log('terminal %s', tok) if tok.id == Id.Expr_Name: return expr.Var(tok) elif tok.id == Id.Expr_Digits: return expr.Const(tok) else: raise AssertionError(tok.id)