def DetectAssocPair(w): # type: (compound_word) -> Optional[Tuple[compound_word, compound_word]] """ Like DetectShAssignment, but for A=(['k']=v ['k2']=v) The key and the value are both strings. So we just pick out word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the [k] syntax is only used for associative array literals, as opposed to indexed array literals. """ parts = w.parts if _LiteralId(parts[0]) != Id.Lit_LBracket: return None n = len(parts) for i in xrange(n): id_ = _LiteralId(parts[i]) if id_ == Id.Lit_ArrayLhsClose: # ]= # e.g. if we have [$x$y]=$a$b key = compound_word(parts[1:i]) # $x$y value = compound_word(parts[i + 1:]) # $a$b from # Type-annotated intermediate value for mycpp translation ret = key, value # type: Optional[Tuple[compound_word, compound_word]] return ret return None
def _ReadExtGlob(self): # type: () -> word_part__ExtGlob """ Grammar: Item = compound_word | EPSILON # important: @(foo|) is allowed LEFT = '@(' | '*(' | '+(' | '?(' | '!(' RIGHT = ')' ExtGlob = LEFT (Item '|')* Item RIGHT # ITEM may be empty Compound includes ExtGlob """ left_token = self.cur_token arms = [] # type: List[word_t] spids = [] # type: List[int] spids.append(left_token.span_id) self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob) self._Next(lex_mode_e.ExtGlob) # advance past LEFT read_word = False # did we just a read a word? To handle @(||). while True: self._Peek() if self.token_type == Id.Right_ExtGlob: if not read_word: arms.append(compound_word()) spids.append(self.cur_token.span_id) break elif self.token_type == Id.Op_Pipe: if not read_word: arms.append(compound_word()) read_word = False self._Next(lex_mode_e.ExtGlob) # lex mode EXTGLOB should only produce these 4 kinds of tokens elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub, Kind.ExtGlob): w = self._ReadCompoundWord(lex_mode_e.ExtGlob) arms.append(w) read_word = True elif self.token_kind == Kind.Eof: p_die('Unexpected EOF reading extended glob that began here', token=left_token) else: raise AssertionError(self.cur_token) part = word_part.ExtGlob(left_token, arms) part.spids.extend(spids) return part
def TildeDetect(UP_w): # type: (word_t) -> Optional[compound_word] """Detect tilde expansion in a word. It might begin with Literal that needs to be turned into a TildeSub. (It depends on whether the second token begins with slash). If so, it return a new word. Otherwise return None. NOTE: - The regex for Lit_TildeLike could be expanded. Right now it's conservative, like Lit_Chars without the /. - It's possible to write this in a mutating style, since only the first token is changed. But note that we CANNOT know this during lexing. """ # NOTE: BracedTree, Empty, etc. can't be tilde expanded if UP_w.tag_() != word_e.Compound: return None w = cast(compound_word, UP_w) if len(w.parts) == 0: # ${a-} has no parts return None part0 = w.parts[0] if LiteralId(part0) != Id.Lit_TildeLike: return None tok0 = cast(Token, part0) new_parts = [word_part.TildeSub(tok0)] # type: List[word_part_t] if len(w.parts) == 1: # can't be zero return compound_word(new_parts) part1 = w.parts[1] id_ = LiteralId(part1) # Lit_Slash is for ${x-~/foo} if id_ == Id.Lit_Slash: # we handled ${x//~/} delimiter earlier, new_parts.extend(w.parts[1:]) return compound_word(new_parts) # Lit_Chars is for ~/foo, if id_ == Id.Lit_Chars and cast(Token, part1).val.startswith('/'): new_parts.extend(w.parts[1:]) return compound_word(new_parts) # It could be something like '~foo:bar', which doesn't have a slash. return None
def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = braced_var_sub(Tok(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = braced_var_sub(Tok(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = Tok(Id.Lit_Chars, 'default') arg_word = compound_word([part]) test_op = suffix_op.Unary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def RunCommandSub(self, node): # type: (command_t) -> str # Hack for weird $(<file) construct if node.tag_() == command_e.Simple: simple = cast(command__Simple, node) # Detect '< file' if (len(simple.words) == 0 and len(simple.redirects) == 1 and simple.redirects[0].op.id == Id.Redir_Less): # change it to __cat < file # note: cmd_eval.py _Dispatch works around lack of spid tok = Token(Id.Lit_Chars, runtime.NO_SPID, '__cat') cat_word = compound_word([tok]) # MUTATE the command.Simple node. This will only be done the first # time in the parent process. simple.words.append(cat_word) p = self._MakeProcess(node, inherit_errexit=self.exec_opts.inherit_errexit()) r, w = posix.pipe() p.AddStateChange(process.StdoutToPipe(r, w)) _ = p.Start() #log('Command sub started %d', pid) chunks = [] # type: List[str] posix.close(w) # not going to write while True: byte_str = posix.read(r, 4096) if len(byte_str) == 0: break chunks.append(byte_str) posix.close(r) status = p.Wait(self.waiter) # OSH has the concept of aborting in the middle of a WORD. We're not # waiting until the command is over! if self.exec_opts.more_errexit(): if self.exec_opts.errexit() and status != 0: raise error.ErrExit('Command sub exited with status %d (%r)', status, NewStr(command_str(node.tag_()))) else: # Set a flag so we check errexit at the same time as bash. Example: # # a=$(false) # echo foo # no matter what comes here, the flag is reset # # Set ONLY until this command node has finished executing. # HACK: move this self.cmd_ev.check_command_sub_status = True self.mem.SetLastStatus(status) # Runtime errors test case: # $("echo foo > $@") # Why rstrip()? # https://unix.stackexchange.com/questions/17747/why-does-shell-command-substitution-gobble-up-a-trailing-newline-char return ''.join(chunks).rstrip('\n')
def TildeDetect(UP_w): # type: (word_t) -> Optional[compound_word] """Detect tilde expansion in a word. It might begin with Literal that needs to be turned into a TildeSub. (It depends on whether the second token begins with slash). If so, it return a new word. Otherwise return None. NOTE: - The regex for Lit_TildeLike could be expanded. Right now it's conservative, like Lit_Chars without the /. - It's possible to write this in a mutating style, since only the first token is changed. But note that we CANNOT know this during lexing. """ # NOTE: BracedTree, Empty, etc. can't be tilde expanded if UP_w.tag_() != word_e.Compound: return None w = cast(compound_word, UP_w) assert w.parts, w UP_part0 = w.parts[0] if _LiteralId(UP_part0) != Id.Lit_TildeLike: return None tok0 = cast(Token, UP_part0) if len(w.parts) == 1: # can't be zero tilde_part = word_part.TildeSub(tok0) return compound_word([tilde_part]) UP_part1 = w.parts[1] # NOTE: We could inspect the raw tokens. if _LiteralId(UP_part1) == Id.Lit_Chars: tok = cast(Token, UP_part1) if tok.val.startswith('/'): tilde_part_ = word_part.TildeSub(tok0) # type: word_part_t parts = [tilde_part_] parts.extend(w.parts[1:]) return compound_word(parts) # It could be something like '~foo:bar', which doesn't have a slash. return None
def ReadForPlugin(self): # type: () -> compound_word """For $PS1, $PS4, etc. This is just like reading a here doc line. "\n" is allowed, as well as the typical substitutions ${x} $(echo hi) $((1 + 2)). """ w = compound_word() self._ReadLikeDQ(None, w.parts) return w
def testBraceExpand(self): w = _assertReadWord(self, 'hi') results = braces._BraceExpand(w.parts) self.assertEqual(1, len(results)) for parts in results: _PrettyPrint(compound_word(parts)) print('') w = _assertReadWord(self, 'B-{a,b}-E') tree = braces._BraceDetect(w) self.assertEqual(3, len(tree.parts)) _PrettyPrint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(2, len(results)) for parts in results: _PrettyPrint(compound_word(parts)) print('') w = _assertReadWord(self, 'B-{a,={b,c,d}=,e}-E') tree = braces._BraceDetect(w) self.assertEqual(3, len(tree.parts)) _PrettyPrint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(5, len(results)) for parts in results: _PrettyPrint(compound_word(parts)) print('') w = _assertReadWord(self, 'B-{a,b}-{c,d}-E') tree = braces._BraceDetect(w) self.assertEqual(5, len(tree.parts)) _PrettyPrint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(4, len(results)) for parts in results: _PrettyPrint(compound_word(parts)) print('')
def testHereDoc(self): w_parser = test_lib.InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [Tok(Id.Lit_Chars, 'ls')] test_lib.AssertAsdlEqual(self, compound_word(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [Tok(Id.Lit_Chars, 'foo')] test_lib.AssertAsdlEqual(self, compound_word(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) t = Tok(Id.Op_Newline, None) test_lib.AssertAsdlEqual(self, t, w) w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [Tok(Id.Lit_Chars, 'ls')] test_lib.AssertAsdlEqual(self, compound_word(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) parts = [Tok(Id.Lit_Chars, 'bar')] test_lib.AssertAsdlEqual(self, compound_word(parts), w) w = w_parser.ReadWord(lex_mode_e.ShCommand) t = Tok(Id.Op_Newline, None) test_lib.AssertAsdlEqual(self, t, w) w = w_parser.ReadWord(lex_mode_e.ShCommand) t = Tok(Id.Eof_Real, '') test_lib.AssertAsdlEqual(self, t, w)
def BraceExpandWords(words): # type: (List[word_t]) -> List[compound_word] out = [] # type: List[compound_word] for w in words: UP_w = w with tagswitch(w) as case: if case(word_e.BracedTree): w = cast(word__BracedTree, UP_w) parts_list = _BraceExpand(w.parts) tmp = [compound_word(p) for p in parts_list] out.extend(tmp) elif case(word_e.Compound): w = cast(compound_word, UP_w) out.append(w) else: raise AssertionError(w.tag_()) return out
def ErrorWord(error_str): # type: (str) -> compound_word t = Token(Id.Lit_Chars, runtime.NO_SPID, error_str) return compound_word([t])
def ErrorWord(fmt, err): # type: (str, _ErrorWithLocation) -> compound_word error_str = fmt % err.UserErrorString() t = Token(Id.Lit_Chars, runtime.NO_SPID, error_str) return compound_word([t])
def _BraceDetect(w): # type: (compound_word) -> Optional[word__BracedTree] """Return a new word if the input word looks like a brace expansion. e.g. {a,b} or {1..10..2} (TODO) Do we want to accept {01..02} ? zsh does make some attempt to do this too. NOTE: This is an iterative algorithm that uses a stack. The grammar-based approach didn't seem natural. It's not LL(1) because of 'part*'. And not LL(k) even? Maybe it be handled with an LR parser? In any case the imperative algorithm with 'early return' for a couple cases is fairly simple. Grammar: # an alternative is a literal, possibly empty, or another brace_expr part = <any part except Literal> alt = part* | brace_expr # a brace_expr is group of at least 2 braced and comma-separated # alternatives, with optional prefix and suffix. brace_expr = part* '{' alt ',' alt (',' alt)* '}' part* """ # Errors: # }a{ - stack depth dips below 0 # {a,b}{ - Stack depth doesn't end at 0 # {a} - no comma, and also not an numeric range cur_parts = [] # type: List[word_part_t] stack = [] # type: List[_StackFrame] found = False for i, part in enumerate(w.parts): append = True UP_part = part if part.tag_() == word_part_e.Literal: part = cast(Token, UP_part) id_ = part.id if id_ == Id.Lit_LBrace: # Save prefix parts. Start new parts list. new_frame = _StackFrame(cur_parts) stack.append(new_frame) cur_parts = [] # clear append = False found = True # assume found, but can early exit with None later elif id_ == Id.Lit_Comma: # Append a new alternative. # NOTE: Should we allow this: # ,{a,b} # or force this: # \,{a,b} # ? We're forcing braces right now but not commas. if len(stack): stack[-1].saw_comma = True stack[-1].alt_part.words.append(compound_word(cur_parts)) cur_parts = [] # clear append = False elif id_ == Id.Lit_RBrace: if len(stack) == 0: # e.g. echo {a,b}{ -- unbalanced { return None # do not expand ANYTHING because of invalid syntax # Detect {1..10} and {1..10..2} #log('stack[-1]: %s', stack[-1]) #log('cur_parts: %s', cur_parts) range_part = None # type: Optional[word_part_t] # only allow {1..3}, not {a,1..3} if not stack[-1].saw_comma and len(cur_parts) == 1: # It must be ONE part. For example, -1..-100..-2 is initially # lexed as a single Lit_Chars token. part2 = cur_parts[0] if part2.tag_() == word_part_e.Literal: tok = cast(Token, part2) if tok.id == Id.Lit_Chars: range_part = _RangePartDetect(tok) if range_part: frame = stack.pop() cur_parts = frame.cur_parts cur_parts.append(range_part) append = False # It doesn't look like a range -- process it as the last element in # {a,b,c} if not range_part: if not stack[ -1].saw_comma: # {foo} is not a real alternative return None # early return stack[-1].alt_part.words.append(compound_word(cur_parts)) frame = stack.pop() cur_parts = frame.cur_parts cur_parts.append(frame.alt_part) append = False if append: cur_parts.append(part) if len(stack) != 0: return None if found: return word.BracedTree(cur_parts) else: return None
def _ReadCompoundWord3(self, lex_mode, eof_type, empty_ok): # type: (lex_mode_t, Id_t, bool) -> compound_word """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ w = compound_word() num_parts = 0 brace_count = 0 done = False while not done: self._Peek() allow_done = empty_ok or num_parts != 0 if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in ( Kind.Lit, Kind.History, Kind.KW, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteral(self.cur_token) # type: word_part_t else: part = self.cur_token if self.token_type == Id.Lit_VarLike and num_parts == 0: # foo= w.parts.append(part) # Unfortunately it's awkward to pull the check for a=(1 2) up to # _ReadWord. next_id = self.lexer.LookAhead(lex_mode) if next_id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) part2 = self._ReadArrayLiteral() w.parts.append(part2) # Array literal must be the last part of the word. self._Next(lex_mode) self._Peek() # EOF, whitespace, newline, Right_Subshell if self.token_kind not in KINDS_THAT_END_WORDS: p_die('Unexpected token after array literal', token=self.cur_token) done = True elif (self.parse_opts.parse_at() and self.token_type == Id.Lit_Splice and num_parts == 0): splice_token = self.cur_token next_id = self.lexer.LookAhead(lex_mode) if next_id == Id.Op_LParen: # @arrayfunc(x) arglist = arg_list() self._ParseCallArguments(arglist) part = word_part.FuncCall(splice_token, arglist) else: part = word_part.Splice(splice_token) w.parts.append(part) # @words or @arrayfunc() must be the last part of the word self._Next(lex_mode) self._Peek() # EOF, whitespace, newline, Right_Subshell if self.token_kind not in KINDS_THAT_END_WORDS: p_die('Unexpected token after array splice', token=self.cur_token) done = True else: # Syntax error for { and } if self.token_type == Id.Lit_LBrace: brace_count += 1 elif self.token_type == Id.Lit_RBrace: brace_count -= 1 # not a literal with lookahead; append it w.parts.append(part) elif self.token_kind == Kind.VSub: vsub_token = self.cur_token part = simple_var_sub(vsub_token) if self.token_type == Id.VSub_DollarName: # Look ahead for $strfunc(x) # $f(x) or --name=$f(x) is allowed # but "--name=$f(x)" not allowed? This would BREAK EXISTING CODE. # It would need a parse option. next_id = self.lexer.LookAhead(lex_mode) if next_id == Id.Op_LParen: arglist = arg_list() self._ParseCallArguments(arglist) part = word_part.FuncCall(vsub_token, arglist) # Unlike @arrayfunc(x), it makes sense to allow $f(1)$f(2) # var a = f(1); var b = f(2); echo $a$b # It's consistent with other uses of $. w.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlob() w.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadLeftParts() w.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass # Never happens, no PushHint for this case. #elif self.token_type == Id.Right_DollarParen: # pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) # Rewind before it's used assert self.next_lex_mode == lex_mode_e.Undefined if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): # Rewind before it's used assert self.next_lex_mode == lex_mode_e.Undefined if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 if self.parse_opts.parse_brace() and num_parts > 1 and brace_count != 0: # accept { and }, but not foo{ p_die( 'Word has unbalanced { }. Maybe add a space or quote it like \{', word=w) return w
def RunCommandSub(self, cs_part): # type: (command_sub) -> str if not self.exec_opts.allow_command_sub(): # TODO: # - Add spid of $( # - Better hints. Use 'run' for 'if myfunc', and 2 lines like local x; # x=$(false) fo assignment builtins. # - Maybe we should have an error message ID that links somewhere? e_die( "Command subs not allowed here because status wouldn't be checked (strict_errexit)." ) node = cs_part.child # Hack for weird $(<file) construct if node.tag_() == command_e.Simple: simple = cast(command__Simple, node) # Detect '< file' if (len(simple.words) == 0 and len(simple.redirects) == 1 and simple.redirects[0].op.id == Id.Redir_Less): # change it to __cat < file # note: cmd_eval.py _Dispatch works around lack of spid tok = Token(Id.Lit_Chars, runtime.NO_SPID, '__cat') cat_word = compound_word([tok]) # MUTATE the command.Simple node. This will only be done the first # time in the parent process. simple.words.append(cat_word) p = self._MakeProcess(node, inherit_errexit=self.exec_opts.inherit_errexit()) r, w = posix.pipe() p.AddStateChange(process.StdoutToPipe(r, w)) _ = p.Start() #log('Command sub started %d', pid) chunks = [] # type: List[str] posix.close(w) # not going to write while True: byte_str = posix.read(r, 4096) if len(byte_str) == 0: break chunks.append(byte_str) posix.close(r) status = p.Wait(self.waiter) # OSH has the concept of aborting in the middle of a WORD. We're not # waiting until the command is over! if self.exec_opts.command_sub_errexit(): if status != 0: raise error.ErrExit('Command sub exited with status %d (%s)' % (status, ui.CommandType(node)), span_id=cs_part.left_token.span_id, status=status) else: # Set a flag so we check errexit at the same time as bash. Example: # # a=$(false) # echo foo # no matter what comes here, the flag is reset # # Set ONLY until this command node has finished executing. # HACK: move this self.cmd_ev.check_command_sub_status = True self.mem.SetLastStatus(status) # Runtime errors test case: # $("echo foo > $@") # Why rstrip()? # https://unix.stackexchange.com/questions/17747/why-does-shell-command-substitution-gobble-up-a-trailing-newline-char return ''.join(chunks).rstrip('\n')