def ParseCase(self): """ case_clause : Case WORD newline_ok in newline_ok case_list? Esac ; """ case_node = command.Case() case_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() # skip case self._Peek() case_node.to_match = self.cur_word self._Next() self._NewlineOk() in_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.KW_In) self._NewlineOk() if self.c_id != Id.KW_Esac: # empty case list self.ParseCaseList(case_node.arms) # TODO: should it return a list of nodes, and extend? self._Peek() esac_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.KW_Esac) self._Next() case_node.spids.extend((case_spid, in_spid, esac_spid)) return case_node
def ParseIf(self): """ if_clause : If command_list Then command_list else_part? Fi ; """ if_node = command.If() self._Next() # skip if cond = self._ParseCommandList() assert cond is not None then_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.KW_Then) body = self._ParseCommandList() assert body is not None arm = syntax_asdl.if_arm(cond.children, body.children) arm.spids.extend((const.NO_INTEGER, then_spid)) # no if spid at first? if_node.arms.append(arm) if self.c_id in (Id.KW_Elif, Id.KW_Else): self._ParseElifElse(if_node) else: if_node.spids.append(const.NO_INTEGER) # no else spid fi_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.KW_Fi) if_node.spids.append(fi_spid) return if_node
def ParseKshFunctionDef(self): """ ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body """ left_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() # skip past 'function' self._Peek() ok, name = word.AsFuncName(self.cur_word) if not ok: p_die('Invalid KSH-style function name', word=self.cur_word) after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1 self._Next() # skip past 'function name self._Peek() if self.c_id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef) self._Next() self._Eat(Id.Right_FuncDef) # Change it: after ) after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1 self._NewlineOk() func = command.FuncDef() func.name = name self.ParseFunctionBody(func) func.spids.append(left_spid) func.spids.append(after_name_spid) return func
def _ParseElifElse(self, if_node): """ else_part: (Elif command_list Then command_list)* Else command_list ; """ arms = if_node.arms self._Peek() while self.c_id == Id.KW_Elif: elif_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() # skip elif cond = self._ParseCommandList() assert cond is not None then_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.KW_Then) body = self._ParseCommandList() assert body is not None arm = syntax_asdl.if_arm(cond.children, body.children) arm.spids.extend((elif_spid, then_spid)) arms.append(arm) if self.c_id == Id.KW_Else: else_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() body = self._ParseCommandList() assert body is not None if_node.else_action = body.children else: else_spid = const.NO_INTEGER if_node.spids.append(else_spid)
def _MakeAssignment(parse_ctx, assign_kw, suffix_words): """Create an command.Assignment node from a keyword and a list of words. NOTE: We don't allow dynamic assignments like: local $1 This can be replaced with eval 'local $1' """ # First parse flags, e.g. -r -x -a -A. None of the flags have arguments. flags = [] n = len(suffix_words) i = 1 while i < n: w = suffix_words[i] ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: break # can't statically evaluate if static_val.startswith('-'): flags.append(static_val) else: break # not a flag, rest are args i += 1 # Now parse bindings or variable names pairs = [] while i < n: w = suffix_words[i] # declare x[y]=1 is valid left_token, close_token, part_offset = word.DetectAssignment(w) if left_token: pair = _MakeAssignPair(parse_ctx, (left_token, close_token, part_offset, w)) else: # In aboriginal in variables/sources: export_if_blank does export "$1". # We should allow that. # Parse this differently then? # dynamic-export? It sets global # variables. ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: p_die("Variable names must be unquoted constants", word=w) # No value is equivalent to '' if not match.IsValidVarName(static_val): p_die('Invalid variable name %r', static_val, word=w) lhs = lhs_expr.LhsName(static_val) lhs.spids.append(word.LeftMostSpanForWord(w)) pair = syntax_asdl.assign_pair(lhs, assign_op_e.Equal, None) left_spid = word.LeftMostSpanForWord(w) pair.spids.append(left_spid) pairs.append(pair) i += 1 node = command.Assignment(assign_kw, flags, pairs) return node
def ParseCaseItem(self): """ case_item: '('? pattern ('|' pattern)* ')' newline_ok command_term? trailer? ; """ self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat) left_spid = word.LeftMostSpanForWord(self.cur_word) if self.c_id == Id.Op_LParen: self._Next() pat_words = [] while True: self._Peek() pat_words.append(self.cur_word) self._Next() self._Peek() if self.c_id == Id.Op_Pipe: self._Next() else: break rparen_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.Right_CasePat) self._NewlineOk() if self.c_id not in (Id.Op_DSemi, Id.KW_Esac): c_list = self._ParseCommandTerm() assert c_list is not None action_children = c_list.children else: action_children = [] dsemi_spid = const.NO_INTEGER last_spid = const.NO_INTEGER self._Peek() if self.c_id == Id.KW_Esac: last_spid = word.LeftMostSpanForWord(self.cur_word) elif self.c_id == Id.Op_DSemi: dsemi_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() else: # Happens on EOF p_die('Expected ;; or esac', word=self.cur_word) self._NewlineOk() arm = syntax_asdl.case_arm(pat_words, action_children) arm.spids.extend((left_spid, rparen_spid, dsemi_spid, last_spid)) return arm
def _CheckStatus(self, status, node): """Raises ErrExitFailure, maybe with location info attached.""" if self.exec_opts.ErrExit() and status != 0: # NOTE: Sometimes location info is duplicated, like on UsageError, or a # bad redirect. Also, pipelines can fail twice. if node.tag == command_e.SimpleCommand: reason = 'command in ' span_id = word.LeftMostSpanForWord(node.words[0]) elif node.tag == command_e.Assignment: reason = 'assignment in ' span_id = self._SpanIdForAssignment(node) elif node.tag == command_e.Subshell: reason = 'subshell invoked from ' span_id = node.spids[0] elif node.tag == command_e.Pipeline: # The whole pipeline can fail separately reason = 'pipeline invoked from ' span_id = node.spids[0] # only one spid else: # NOTE: The fallback of CurrentSpanId() fills this in. reason = '' span_id = const.NO_INTEGER raise util.ErrExitFailure( 'Exiting with status %d (%sPID %d)', status, reason, posix.getpid(), span_id=span_id, status=status)
def _ValToArithOrError(self, val, int_coerce=True, blame_word=None, span_id=const.NO_INTEGER): if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) #log('_ValToArithOrError span=%s blame=%s', span_id, blame_word) try: i = self._ValToArith(val, span_id, int_coerce=int_coerce) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: i = 0 span_id = word.SpanIdFromError(e) if self.arena: # BoolEvaluator for test builtin doesn't have it. if span_id != const.NO_INTEGER: ui.PrintFilenameAndLine(span_id, self.arena) else: log('*** Warning has no location info ***') warn(e.UserErrorString()) return i
def EvalWordSequence2(self, words): """Turns a list of Words into a list of strings. Unlike the EvalWord*() methods, it does globbing. Args: words: list of Word instances Returns: argv: list of string arguments, or None if there was an eval error """ # Parse time: # 1. brace expansion. TODO: Do at parse time. # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the # first WordPart. # # Run time: # 3. tilde sub, var sub, command sub, arith sub. These are all # "concurrent" on WordParts. (optional process sub with <() ) # 4. word splitting. Can turn this off with a shell option? Definitely # off for oil. # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc. #log('W %s', words) arg_vec = arg_vector() strs = arg_vec.strs n = 0 for w in words: part_vals = [] self._EvalWordToParts(w, False, part_vals) # not double quoted if 0: log('') log('part_vals after _EvalWordToParts:') for entry in part_vals: log(' %s', entry) frames = _MakeWordFrames(part_vals) if 0: log('') log('frames after _MakeWordFrames:') for entry in frames: log(' %s', entry) # Now each frame will append zero or more args. for frame in frames: self._EvalWordFrame(frame, strs) # Fill in spids parallel to strs. n_next = len(strs) spid = word.LeftMostSpanForWord(w) for _ in xrange(n_next - n): arg_vec.spids.append(spid) n = n_next #log('ARGV %s', argv) return arg_vec
def ParseDoGroup(self): """ Used by ForEach, ForExpr, While, Until. Should this be a Do node? do_group : Do command_list Done ; /* Apply rule 6 */ """ self._Eat(Id.KW_Do) do_spid = word.LeftMostSpanForWord(self.cur_word) # after _Eat c_list = self._ParseCommandList() # could be any thing assert c_list is not None self._Eat(Id.KW_Done) done_spid = word.LeftMostSpanForWord(self.cur_word) # after _Eat node = command.DoGroup(c_list.children) node.spids.extend((do_spid, done_spid)) return node
def ParseFunctionDef(self): """ function_header : fname '(' ')' function_def : function_header newline_ok function_body ; Precondition: Looking at the function name. Post condition: NOTE: There is an ambiguity with: function foo ( echo hi ) and function foo () ( echo hi ) Bash only accepts the latter, though it doesn't really follow a grammar. """ left_spid = word.LeftMostSpanForWord(self.cur_word) ok, name = word.AsFuncName(self.cur_word) if not ok: p_die('Invalid function name', word=self.cur_word) self._Next() # skip function name # Must be true beacuse of lookahead self._Peek() assert self.c_id == Id.Op_LParen, self.cur_word self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef) self._Next() self._Eat(Id.Right_FuncDef) after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1 self._NewlineOk() func = command.FuncDef() func.name = name self.ParseFunctionBody(func) func.spids.append(left_spid) func.spids.append(after_name_spid) return func
def _assertSpanForWord(test, code_str): arena, w = _assertReadWordWithArena(test, code_str) span_id = word.LeftMostSpanForWord(w) print(code_str) print(span_id) if span_id != const.NO_INTEGER: span = arena.GetLineSpan(span_id) print(span)
def SpanIdFromError(error): #print(parse_error) if error.span_id != const.NO_INTEGER: return error.span_id if error.token: return error.token.span_id if error.part: return word.LeftMostSpanForPart(error.part) if error.word: return word.LeftMostSpanForWord(error.word) return const.NO_INTEGER
def ParseDParen(self): maybe_error_word = self.cur_word left_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() # skip (( anode, right_spid = self.w_parser.ReadDParen() assert anode is not None node = command.DParen(anode) node.spids.append(left_spid) node.spids.append(right_spid) return node
def _StringToIntegerOrError(self, s, blame_word=None, span_id=const.NO_INTEGER): """Used by both [[ $x -gt 3 ]] and (( $x )).""" if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) try: i = _StringToInteger(s, span_id=span_id) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: self.errfmt.PrettyPrintError(e, prefix='warning: ') i = 0 return i
def _ValToArithOrError(self, val, blame_word=None, span_id=const.NO_INTEGER): if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) #log('_ValToArithOrError span=%s blame=%s', span_id, blame_word) try: i = self._ValToArith(val, span_id) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: i = 0 span_id = word.SpanIdFromError(e) self.errfmt.PrettyPrintError(e, prefix='warning: ') return i
def ParseSubshell(self): left_spid = word.LeftMostSpanForWord(self.cur_word) self._Next() # skip past ( # Ensure that something $( (cd / && pwd) ) works. If ) is already on the # translation stack, we want to delay it. self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) c_list = self._ParseCommandList() assert c_list is not None # Remove singleton CommandList as an optimization. if len(c_list.children) == 1: child = c_list.children[0] else: child = c_list node = command.Subshell(child) right_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.Right_Subshell) node.spids.extend((left_spid, right_spid)) return node
def _ValToArithOrError(self, val, int_coerce=True, blame_word=None, span_id=const.NO_INTEGER): if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) #log('_ValToArithOrError span=%s blame=%s', span_id, blame_word) try: i = self._ValToArith(val, span_id, int_coerce=int_coerce) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: i = 0 span_id = word.SpanIdFromError(e) ui.PrintWarning(e.UserErrorString(), span_id, self.arena) return i
def _ParseForEachLoop(self): node = command.ForEach() node.do_arg_iter = False ok, iter_name, quoted = word.StaticEval(self.cur_word) if not ok or quoted: p_die("Loop variable name should be a constant", word=self.cur_word) if not match.IsValidVarName(iter_name): p_die("Invalid loop variable name", word=self.cur_word) node.iter_name = iter_name self._Next() # skip past name self._NewlineOk() in_spid = const.NO_INTEGER semi_spid = const.NO_INTEGER self._Peek() if self.c_id == Id.KW_In: self._Next() # skip in in_spid = word.LeftMostSpanForWord(self.cur_word) + 1 iter_words, semi_spid = self.ParseForWords() assert iter_words is not None words2 = braces.BraceDetectAll(iter_words) words3 = word.TildeDetectAll(words2) node.iter_words = words3 elif self.c_id == Id.Op_Semi: node.do_arg_iter = True # implicit for loop self._Next() elif self.c_id == Id.KW_Do: node.do_arg_iter = True # implicit for loop # do not advance else: # for foo BAD p_die('Unexpected word after for loop variable', word=self.cur_word) node.spids.extend((in_spid, semi_spid)) body_node = self.ParseDoGroup() assert body_node is not None node.body = body_node return node
def ParseBraceGroup(self): """ brace_group : LBrace command_list RBrace ; """ left_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.Lit_LBrace) c_list = self._ParseCommandList() assert c_list is not None # Not needed #right_spid = word.LeftMostSpanForWord(self.cur_word) self._Eat(Id.Lit_RBrace) node = command.BraceGroup(c_list.children) node.spids.append(left_spid) return node
def _StringToIntegerOrError(self, s, blame_word=None, span_id=const.NO_INTEGER): """Used by both [[ $x -gt 3 ]] and (( $x )).""" if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) try: i = _StringToInteger(s, span_id=span_id) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: i = 0 # TODO: Need the arena for printing this error? #ui.PrettyPrintError(e) warn(e.UserErrorString()) return i
def DoCommand(self, node, local_symbols, at_top_level=False): if node.tag == command_e.CommandList: # TODO: How to distinguish between echo hi; echo bye; and on separate # lines for child in node.children: self.DoCommand(child, local_symbols, at_top_level=at_top_level) elif node.tag == command_e.SimpleCommand: # How to preserve spaces between words? Do you want to do it? # Well you need to test this: # # echo foo \ # bar # TODO: Need to print until the left most part of the phrase? the phrase # is a word, binding, redirect. #self.cursor.PrintUntil() if node.more_env: (left_spid, ) = node.more_env[0].spids self.cursor.PrintUntil(left_spid) self.f.write('env ') # We only need to transform the right side, not left side. for pair in node.more_env: self.DoWordInCommand(pair.val, local_symbols) # More translations: # - . to source # - eval to sh-eval if node.words: first_word = node.words[0] ok, val, quoted = word.StaticEval(first_word) word0_spid = word.LeftMostSpanForWord(first_word) if ok and not quoted: if val == '[': last_word = node.words[-1] # Check if last word is ] ok, val, quoted = word.StaticEval(last_word) if ok and not quoted and val == ']': # Replace [ with 'test' self.cursor.PrintUntil(word0_spid) self.cursor.SkipUntil(word0_spid + 1) self.f.write('test') for w in node.words[1:-1]: self.DoWordInCommand(w, local_symbols) # Now omit ] last_spid = word.LeftMostSpanForWord(last_word) self.cursor.PrintUntil(last_spid - 1) # Get the space before self.cursor.SkipUntil(last_spid + 1) # ] takes one spid return else: raise RuntimeError('Got [ without ]') elif val == '.': self.cursor.PrintUntil(word0_spid) self.cursor.SkipUntil(word0_spid + 1) self.f.write('source') return for w in node.words: self.DoWordInCommand(w, local_symbols) # NOTE: This will change to "phrase"? Word or redirect. for r in node.redirects: self.DoRedirect(r, local_symbols) # TODO: Print the terminator. Could be \n or ; # Need to print env like PYTHONPATH = 'foo' && ls # Need to print redirects: # < > are the same. << is here string, and >> is assignment. # append is >+ # TODO: static_eval of simple command # - [ -> "test". Eliminate trailing ]. # - . -> source, etc. elif node.tag == command_e.Assignment: self.DoAssignment(node, at_top_level, local_symbols) elif node.tag == command_e.Pipeline: # Obscure: |& turns into |- or |+ for stderr. # TODO: # if ! true; then -> if not true { # if ! echo | grep; then -> if not { echo | grep } { # } # not is like do {}, but it negates the return value I guess. for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.AndOr: for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.Sentence: # 'ls &' to 'fork ls' # Keep ; the same. self.DoCommand(node.child, local_symbols) # This has to be different in the function case. elif node.tag == command_e.BraceGroup: # { echo hi; } -> do { echo hi } # For now it might be OK to keep 'do { echo hi; } #left_spid, right_spid = node.spids (left_spid, ) = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write('do {') for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.Subshell: # (echo hi) -> shell echo hi # (echo hi; echo bye) -> shell {echo hi; echo bye} (left_spid, right_spid) = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write('shell {') self.DoCommand(node.command_list, local_symbols) #self._DebugSpid(right_spid) #self._DebugSpid(right_spid + 1) #print('RIGHT SPID', right_spid) self.cursor.PrintUntil(right_spid) self.cursor.SkipUntil(right_spid + 1) self.f.write('}') elif node.tag == command_e.DParen: # (( a == 0 )) is sh-expr ' a == 0 ' # # NOTE: (( n++ )) is auto-translated to sh-expr 'n++', but could be set # n++. left_spid, right_spid = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write("sh-expr '") self.cursor.PrintUntil(right_spid - 1) # before )) self.cursor.SkipUntil(right_spid + 1) # after )) -- each one is a token self.f.write("'") elif node.tag == command_e.DBracket: # [[ 1 -eq 2 ]] to (1 == 2) self.DoBoolExpr(node.expr) elif node.tag == command_e.FuncDef: # TODO: skip name #self.f.write('proc %s' % node.name) # New symbol table for every function. new_local_symbols = {} # Should be the left most span, including 'function' self.cursor.PrintUntil(node.spids[0]) self.f.write('proc ') self.f.write(node.name) self.cursor.SkipUntil(node.spids[1]) if node.body.tag == command_e.BraceGroup: # Don't add "do" like a standalone brace group. Just use {}. for child in node.body.children: self.DoCommand(child, new_local_symbols) else: pass # Add {}. # proc foo { # shell {echo hi; echo bye} # } #self.DoCommand(node.body) elif node.tag == command_e.BraceGroup: for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.DoGroup: do_spid, done_spid = node.spids self.cursor.PrintUntil(do_spid) self.cursor.SkipUntil(do_spid + 1) self.f.write('{') for child in node.children: self.DoCommand(child, local_symbols) self.cursor.PrintUntil(done_spid) self.cursor.SkipUntil(done_spid + 1) self.f.write('}') elif node.tag == command_e.ForEach: # Need to preserve spaces between words, because there can be line # wrapping. # for x in a b c \ # d e f; do in_spid, semi_spid = node.spids if in_spid == const.NO_INTEGER: #self.cursor.PrintUntil() # 'for x' and then space self.f.write('for %s in @Argv ' % node.iter_name) self.cursor.SkipUntil(node.body.spids[0]) else: self.cursor.PrintUntil(in_spid + 1) # 'for x in' and then space self.f.write('[') for w in node.iter_words: self.DoWordInCommand(w, local_symbols) self.f.write(']') #print("SKIPPING SEMI %d" % semi_spid, file=sys.stderr) if semi_spid != const.NO_INTEGER: self.cursor.PrintUntil(semi_spid) self.cursor.SkipUntil(semi_spid + 1) self.DoCommand(node.body, local_symbols) elif node.tag == command_e.ForExpr: # Change (( )) to ( ), and then _FixDoGroup pass elif node.tag == command_e.WhileUntil: # Skip 'until', and replace it with 'while not' if node.keyword.id == Id.KW_Until: kw_spid = node.keyword.span_id self.cursor.PrintUntil(kw_spid) self.f.write('while not') self.cursor.SkipUntil(kw_spid + 1) cond = node.cond # Skip the semi-colon in the condition, which is ususally a Sentence if len(cond) == 1 and cond[0].tag == command_e.Sentence: self.DoCommand(cond[0].child, local_symbols) semi_spid = cond[0].terminator.span_id self.cursor.SkipUntil(semi_spid + 1) self.DoCommand(node.body, local_symbols) elif node.tag == command_e.If: else_spid, fi_spid = node.spids # if foo; then -> if foo { # elif foo; then -> } elif foo { for arm in node.arms: elif_spid, then_spid = arm.spids if elif_spid != const.NO_INTEGER: self.cursor.PrintUntil(elif_spid) self.f.write('} ') cond = arm.cond if len(cond) == 1 and cond[0].tag == command_e.Sentence: sentence = cond[0] self.DoCommand(sentence, local_symbols) # Remove semi-colon semi_spid = sentence.terminator.span_id self.cursor.PrintUntil(semi_spid) self.cursor.SkipUntil(semi_spid + 1) else: for child in arm.cond: self.DoCommand(child, local_symbols) self.cursor.PrintUntil(then_spid) self.cursor.SkipUntil(then_spid + 1) self.f.write('{') for child in arm.action: self.DoCommand(child, local_symbols) # else -> } else { if node.else_action: self.cursor.PrintUntil(else_spid) self.f.write('} ') self.cursor.PrintUntil(else_spid + 1) self.f.write(' {') for child in node.else_action: self.DoCommand(child, local_symbols) # fi -> } self.cursor.PrintUntil(fi_spid) self.cursor.SkipUntil(fi_spid + 1) self.f.write('}') elif node.tag == command_e.Case: case_spid, in_spid, esac_spid = node.spids self.cursor.PrintUntil(case_spid) self.cursor.SkipUntil(case_spid + 1) self.f.write('match') # Reformat "$1" to $1 self.DoWordInCommand(node.to_match, local_symbols) self.cursor.PrintUntil(in_spid) self.cursor.SkipUntil(in_spid + 1) self.f.write('{') # matchstr $var { # each arm needs the ) and the ;; node to skip over? for arm in node.arms: left_spid, rparen_spid, dsemi_spid, last_spid = arm.spids #print(left_spid, rparen_spid, dsemi_spid) self.cursor.PrintUntil(left_spid) # Hm maybe keep | because it's semi-deprecated? You acn use # reload|force-relaod { # } # e/reload|force-reload/ { # } # / 'reload' or 'force-reload' / { # } # # Yeah it's the more abbreviated syntax. # change | to 'or' for pat in arm.pat_list: pass self.f.write('with ') # Remove the ) self.cursor.PrintUntil(rparen_spid) self.cursor.SkipUntil(rparen_spid + 1) for child in arm.action: self.DoCommand(child, local_symbols) if dsemi_spid != const.NO_INTEGER: # Remove ;; self.cursor.PrintUntil(dsemi_spid) self.cursor.SkipUntil(dsemi_spid + 1) elif last_spid != const.NO_INTEGER: self.cursor.PrintUntil(last_spid) else: raise AssertionError( "Expected with dsemi_spid or last_spid in case arm") self.cursor.PrintUntil(esac_spid) self.cursor.SkipUntil(esac_spid + 1) self.f.write('}') # strmatch $var { elif node.tag == command_e.NoOp: pass elif node.tag == command_e.ControlFlow: # No change for break / return / continue pass elif node.tag == command_e.TimeBlock: self.DoCommand(node.pipeline, local_symbols) else: #log('Command not handled: %s', node) raise AssertionError(node.__class__.__name__)
def DoAssignment(self, node, at_top_level, local_symbols): """ local_symbols: - Add every 'local' declaration to it - problem: what if you have local in an "if" ? - we could treat it like nested scope and see what happens? Do any programs have a problem with it? case/if/for/while/BraceGroup all define scopes or what? You don't want inconsistency of variables that could be defined at any point. - or maybe you only need it within "if / case" ? Well I guess for/while can break out of the loop and cause problems. A break is an "if". - for subsequent """ # Change RHS to expression language. Bare words not allowed. foo -> 'foo' has_rhs = False # TODO: This is on a per-variable basis. # local foo -> var foo = '' # readonly foo -> setconst foo # export foo -> export foo # TODO: # - This depends on self.mode. # - And we also need the enclosing FuncDef node to analyze. # - or we need a symbol table for the current function. Forget about # # Oil keywords: # - global : scope qualifier # - var, const : mutability # - export : state mutation # - setconst -- make a variable mutable. or maybe freeze var? # # NOTE: Bash also has "unset". Does anyone use it? # You can use "delete" like Python I guess. It's not the opposite of # set. # NOTE: # - We CAN tell if a variable has been defined locally. # - We CANNOT tell if it's been defined globally, because different files # share the same global namespace, and we can't statically figure out what # files are in the program. defined_locally = False # is it a local variable in this function? # can't tell if global if node.keyword == Id.Assign_Local: # Assume that 'local' it's a declaration. In osh, it's an error if # locals are redefined. In bash, it's OK to do 'local f=1; local f=2'. # Could have a flag if enough people do this. if at_top_level: raise RuntimeError('local at top level is invalid') if defined_locally: raise RuntimeError("Can't redefine local") keyword_spid = node.spids[0] self.cursor.PrintUntil(keyword_spid) self.cursor.SkipUntil(keyword_spid + 1) self.f.write('var') if local_symbols is not None: for pair in node.pairs: # NOTE: Not handling local a[b]=c if pair.lhs.tag == lhs_expr_e.LhsName: #print("REGISTERED %s" % pair.lhs.name) local_symbols[pair.lhs.name] = True elif node.keyword == Id.Assign_None: self.cursor.PrintUntil(node.spids[0]) # For now, just detect whether the FIRST assignment on the line has been # declared locally. We might want to split every line into separate # statements. if local_symbols is not None: lhs0 = node.pairs[0].lhs if lhs0.tag == lhs_expr_e.LhsName and lhs0.name in local_symbols: defined_locally = True #print("CHECKING NAME", lhs0.name, defined_locally, local_symbols) has_array = any(pair.lhs.tag == lhs_expr_e.CompatIndexedName for pair in node.pairs) # need semantic analysis. # Would be nice to assume that it's a local though. if has_array: self.f.write('compat ') # 'compat array-assign' syntax elif at_top_level: self.f.write('setglobal ') elif defined_locally: self.f.write('set ') #self.f.write('[local mutated]') else: # We're in a function, but it's not defined locally, so we must be # mutatting a global. self.f.write('setglobal ') elif node.keyword == Id.Assign_Readonly: # Explicit const. Assume it can't be redefined. # Verb. # # Top level; # readonly FOO=bar -> const FOO = 'bar' # readonly FOO -> freeze FOO # function level: # readonly FOO=bar -> const global FOO ::= 'bar' # readonly FOO -> freeze FOO keyword_spid = node.spids[0] if at_top_level: self.cursor.PrintUntil(keyword_spid) self.cursor.SkipUntil(keyword_spid + 1) self.f.write('const') elif defined_locally: # TODO: Actually we might want 'freeze here. In bash, you can make a # variable readonly after its defined. raise RuntimeError("Constant redefined locally") else: # Same as global level self.cursor.PrintUntil(keyword_spid) self.cursor.SkipUntil(keyword_spid + 1) self.f.write('const') elif node.keyword == Id.Assign_Declare: # declare -rx foo spam=eggs # export foo # setconst foo # # spam = eggs # export spam # Have to parse the flags self.f.write('TODO ') # foo=bar spam=eggs -> foo = 'bar', spam = 'eggs' n = len(node.pairs) for i, pair in enumerate(node.pairs): if pair.lhs.tag == lhs_expr_e.LhsName: left_spid = pair.spids[0] self.cursor.PrintUntil(left_spid) # Assume skipping over one Lit_VarLike token self.cursor.SkipUntil(left_spid + 1) # Replace name. I guess it's Lit_Chars. self.f.write(pair.lhs.name) self.f.write(' = ') # TODO: This should be translated from EmptyWord. if pair.rhs is None: self.f.write("''") # local i -> var i = '' else: self.DoWordAsExpr(pair.rhs, local_symbols) elif pair.lhs.tag == lhs_expr_e.CompatIndexedName: # NOTES: # - parse_ctx.one_pass_parse should be on, so the span invariant # is accurate # - Then do the following translation: # a[x+1]="foo $bar" -> # compat array-assign a 'x+1' "$foo $bar" # This avoids dealing with nested arenas. # # TODO: This isn't great when there are multiple assignments. # a[x++]=1 b[y++]=2 # # 'compat' could apply to the WHOLE statement, with multiple # assignments. self.f.write("array-assign %s '%s' " % (pair.lhs.name, pair.lhs.index)) # TODO: This should be translated from EmptyWord. if pair.rhs is None: self.f.write("''") # local i -> var i = '' else: rhs_spid = word.LeftMostSpanForWord(pair.rhs) self.cursor.SkipUntil(rhs_spid) self.DoWordAsExpr(pair.rhs, local_symbols) else: raise AssertionError(pair.lhs.__class__.__name__) if i != n - 1: self.f.write(',')
def DoRedirect(self, node, local_symbols): #print(node, file=sys.stderr) op_spid = node.op.span_id op_id = node.op.id self.cursor.PrintUntil(op_spid) # TODO: # - Do < and <& the same way. # - How to handle here docs and here docs? # - >> becomes >+ or >-, or maybe >>> if node.tag == redir_e.Redir: if node.fd == const.NO_INTEGER: if op_id == Id.Redir_Great: self.f.write('>') # Allow us to replace the operator self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace >& 2 with > !2 spid = word.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) #self.DoWordInCommand(node.arg_word) else: # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the # formatter. self.f.write('!%d ' % node.fd) if op_id == Id.Redir_Great: self.f.write('>') self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace 1>& 2 with !1 > !2 spid = word.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) self.DoWordInCommand(node.arg_word, local_symbols) elif node.tag == redir_e.HereDoc: ok, delimiter, delim_quoted = word.StaticEval(node.here_begin) if not ok: p_die('Invalid here doc delimiter', word=node.here_begin) # Turn everything into <<. We just change the quotes self.f.write('<<') #here_begin_spid2 = word.RightMostSpanForWord(node.here_begin) if delim_quoted: self.f.write(" '''") else: self.f.write(' """') delim_end_spid = word.RightMostSpanForWord(node.here_begin) self.cursor.SkipUntil(delim_end_spid + 1) #self.cursor.SkipUntil(here_begin_spid + 1) # Now print the lines. TODO: Have a flag to indent these to the level of # the owning command, e.g. # cat <<EOF # EOF # Or since most here docs are the top level, you could just have a hack # for a fixed indent? TODO: Look at real use cases. for part in node.stdin_parts: self.DoWordPart(part, local_symbols) self.cursor.SkipUntil(node.here_end_span_id + 1) if delim_quoted: self.f.write("'''\n") else: self.f.write('"""\n') # Need #self.cursor.SkipUntil(here_end_spid2) else: raise AssertionError(node.__class__.__name__) # <<< 'here word' # << 'here word' # # 2> out.txt # !2 > out.txt # cat 1<< EOF # hello $name # EOF # cat !1 << """ # hello $name # """ # # cat << 'EOF' # no expansion # EOF # cat <<- 'EOF' # no expansion and indented # # cat << ''' # no expansion # ''' # cat << ''' # no expansion and indented # ''' # Warn about multiple here docs on a line. # As an obscure feature, allow # cat << \'ONE' << \"TWO" # 123 # ONE # 234 # TWO # The _ is an indicator that it's not a string to be piped in. pass
def ParseSimpleCommand(self, cur_aliases): """ Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g) io_file : '<' filename | LESSAND filename ... io_here : DLESS here_end | DLESSDASH here_end redirect : IO_NUMBER (io_redirect | io_here) prefix_part : ASSIGNMENT_WORD | redirect cmd_part : WORD | redirect assign_kw : Declare | Export | Local | Readonly # Without any words it is parsed as a command, not an assigment assign_listing : assign_kw # Now we have something to do (might be changing assignment flags too) # NOTE: any prefixes should be a warning, but they are allowed in shell. assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+ # an external command, a function call, or a builtin -- a "word_command" word_command : prefix_part* cmd_part+ simple_command : assign_listing | assignment | proc_command Simple imperative algorithm: 1) Read a list of words and redirects. Append them to separate lists. 2) Look for the first non-assignment word. If it's declare, etc., then keep parsing words AND assign words. Otherwise, just parse words. 3) If there are no non-assignment words, then it's a global assignment. { redirects, global assignments } OR { redirects, prefix_bindings, words } OR { redirects, ERROR_prefix_bindings, keyword, assignments, words } THEN CHECK that prefix bindings don't have any array literal parts! global assignment and keyword assignments can have the of course. well actually EXPORT shouldn't have them either -- WARNING 3 cases we want to warn: prefix_bindings for assignment, and array literal in prefix bindings, or export A command can be an assignment word, word, or redirect on its own. ls >out.txt >out.txt FOO=bar # this touches the file, and hten Or any sequence: ls foo bar <in.txt ls foo bar >out.txt <in.txt ls >out.txt foo bar Or add one or more environment bindings: VAR=val env >out.txt VAR=val env here_end vs filename is a matter of whether we test that it's quoted. e.g. <<EOF vs <<'EOF'. """ result = self._ScanSimpleCommand() redirects, words = result if not words: # e.g. >out.txt # redirect without words node = command.SimpleCommand() node.redirects = redirects return node preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words) if not suffix_words: # ONE=1 a[x]=1 TWO=2 (with no other words) if redirects: left_token, _, _, _ = preparsed_list[0] p_die("Global assignment shouldn't have redirects", token=left_token) pairs = [] for preparsed in preparsed_list: pairs.append(_MakeAssignPair(self.parse_ctx, preparsed)) node = command.Assignment(Id.Assign_None, [], pairs) left_spid = word.LeftMostSpanForWord(words[0]) node.spids.append(left_spid) # no keyword spid to skip past return node kind, kw_token = word.KeywordToken(suffix_words[0]) if kind == Kind.Assign: # Here we StaticEval suffix_words[1] to see if we have an ASSIGNMENT COMMAND # like 'typeset -p', which lists variables -- a SimpleCommand rather than # an Assignment. # # Note we're not handling duplicate flags like 'typeset -pf'. I see this # in bashdb (bash debugger) but it can just be changed to 'typeset -p # -f'. is_command = False if len(suffix_words) > 1: ok, val, _ = word.StaticEval(suffix_words[1]) if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS: is_command = True if is_command: # declare -f, declare -p, typeset -p, etc. node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects) return node if redirects: # Attach the error location to the keyword. It would be more precise # to attach it to the p_die("Assignments shouldn't have redirects", token=kw_token) if preparsed_list: # FOO=bar local spam=eggs not allowed # Use the location of the first value. TODO: Use the whole word # before splitting. left_token, _, _, _ = preparsed_list[0] p_die("Assignments shouldn't have environment bindings", token=left_token) # declare str='', declare -a array=() node = _MakeAssignment(self.parse_ctx, kw_token.id, suffix_words) node.spids.append(kw_token.span_id) return node if kind == Kind.ControlFlow: if redirects: p_die("Control flow shouldn't have redirects", token=kw_token) if preparsed_list: # FOO=bar local spam=eggs not allowed # TODO: Change location as above left_token, _, _, _ = preparsed_list[0] p_die("Control flow shouldn't have environment bindings", token=left_token) # Attach the token for errors. (Assignment may not need it.) if len(suffix_words) == 1: arg_word = None elif len(suffix_words) == 2: arg_word = suffix_words[1] else: p_die('Unexpected argument to %r', kw_token.val, word=suffix_words[2]) return command.ControlFlow(kw_token, arg_word) # If any expansions were detected, then parse again. node = self._MaybeExpandAliases(suffix_words, cur_aliases) if node: # NOTE: There are other types of nodes with redirects. Do they matter? if node.tag == command_e.SimpleCommand: node.redirects = redirects _AppendMoreEnv(preparsed_list, node.more_env) return node # TODO check that we don't have env1=x x[1]=y env2=z here. # FOO=bar printenv.py FOO node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects) return node
def _MaybeExpandAliases(self, words, cur_aliases): """Try to expand aliases. Our implementation of alias has two design choices: - Where to insert it in parsing. We do it at the end of ParseSimpleCommand. - What grammar rule to parse the expanded alias buffer with. In our case it's ParseCommand(). This doesn't quite match what other shells do, but I can't figure out a better places. Most test cases pass, except for ones like: alias LBRACE='{' LBRACE echo one; echo two; } alias MULTILINE='echo 1 echo 2 echo 3' MULTILINE NOTE: dash handles aliases in a totally diferrent way. It has a global variable checkkwd in parser.c. It assigns it all over the grammar, like this: checkkwd = CHKNL | CHKKWD | CHKALIAS; The readtoken() function checks (checkkwd & CHKALIAS) and then calls lookupalias(). This seems to provide a consistent behavior among shells, but it's less modular and testable. Bash also uses a global 'parser_state & PST_ALEXPNEXT'. Returns: A command node if any aliases were expanded, or None otherwise. """ # The last char that we might parse. right_spid = word.RightMostSpanForWord(words[-1]) first_word_str = None # for error message expanded = [] i = 0 n = len(words) while i < n: w = words[i] ok, word_str, quoted = word.StaticEval(w) if not ok or quoted: break alias_exp = self.aliases.get(word_str) if alias_exp is None: break # Prevent infinite loops. This is subtle: we want to prevent infinite # expansion of alias echo='echo x'. But we don't want to prevent # expansion of the second word in 'echo echo', so we add 'i' to # "cur_aliases". if (word_str, i) in cur_aliases: break if i == 0: first_word_str = word_str # for error message #log('%r -> %r', word_str, alias_exp) cur_aliases.append((word_str, i)) expanded.append(alias_exp) i += 1 if not alias_exp.endswith(' '): # alias e='echo [ ' is the same expansion as # alias e='echo [' # The trailing space indicates whether we should continue to expand # aliases; it's not part of it. expanded.append(' ') break # No more expansions if not expanded: # No expansions; caller does parsing. return None # We got some expansion. Now copy the rest of the words. # We need each NON-REDIRECT word separately! For example: # $ echo one >out two # dash/mksh/zsh go beyond the first redirect! while i < n: w = words[i] left_spid = word.LeftMostSpanForWord(w) right_spid = word.RightMostSpanForWord(w) # Adapted from tools/osh2oil.py Cursor.PrintUntil for span_id in xrange(left_spid, right_spid + 1): span = self.arena.GetLineSpan(span_id) line = self.arena.GetLine(span.line_id) piece = line[span.col : span.col + span.length] expanded.append(piece) expanded.append(' ') # Put space back between words. i += 1 code_str = ''.join(expanded) lines = code_str.splitlines(True) # Keep newlines line_info = [] # TODO: Add location information self.arena.PushSource( '<expansion of alias %r at line %d of %s>' % (first_word_str, -1, 'TODO')) try: for i, line in enumerate(lines): line_id = self.arena.AddLine(line, i+1) line_info.append((line_id, line, 0)) finally: self.arena.PopSource() line_reader = reader.VirtualLineReader(line_info, self.arena) cp = self.parse_ctx.MakeOshParser(line_reader) try: node = cp.ParseCommand(cur_aliases=cur_aliases) except util.ParseError as e: # Failure to parse alias expansion is a fatal error # We don't need more handling here/ raise if 0: log('AFTER expansion:') from osh import ast_lib ast_lib.PrettyPrint(node) return node
def _Dispatch(self, node, fork_external): # If we call RunCommandSub in a recursive call to the executor, this will # be set true (if strict-errexit is false). But it only lasts for one # command. self.check_command_sub_status = False #argv0 = None # for error message check_errexit = False # for errexit if node.tag == command_e.SimpleCommand: check_errexit = True # Find span_id for a basic implementation of $LINENO, e.g. # PS4='+$SOURCE_NAME:$LINENO:' # NOTE: osh2oil uses node.more_env, but we don't need that. span_id = const.NO_INTEGER if node.words: first_word = node.words[0] span_id = word.LeftMostSpanForWord(first_word) self.mem.SetCurrentSpanId(span_id) # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up logging. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.word_ev.EvalWordSequence(words) # This comes before evaluating env, in case there are problems evaluating # it. We could trace the env separately? Also trace unevaluated code # with set-o verbose? self.tracer.OnSimpleCommand(argv) # NOTE: RunSimpleCommand never returns when fork_external=False! if node.more_env: # I think this guard is necessary? self.mem.PushTemp() try: self._EvalTempEnv(node.more_env) status = self.RunSimpleCommand(argv, fork_external, span_id) finally: self.mem.PopTemp() else: status = self.RunSimpleCommand(argv, fork_external, span_id) elif node.tag == command_e.ExpandedAlias: # Expanded aliases need redirects and env bindings from the calling # context, as well as redirects in the expansion! # TODO: SetCurrentSpanId to OUTSIDE? Don't bother with stuff inside # expansion, since aliase are discouarged. if node.more_env: self.mem.PushTemp() try: self._EvalTempEnv(node.more_env) status = self._Execute(node.child) finally: self.mem.PopTemp() else: status = self._Execute(node.child) elif node.tag == command_e.Sentence: # Don't check_errexit since this isn't a real node! if node.terminator.id == Id.Op_Semi: status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: check_errexit = True if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: check_errexit = True # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.command_list) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: check_errexit = True result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: check_errexit = True i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: # TODO: Also do dynamic assignment here flags = word_compile.ParseAssignFlags(node.flags) if node.keyword == Id.Assign_Local: lookup_mode = scope_e.LocalOnly # typeset and declare are synonyms? I see typeset -a a=() the most. elif node.keyword in (Id.Assign_Declare, Id.Assign_Typeset): # declare is like local, except it can also be used outside functions? if var_flags_e.Global in flags: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly elif node.keyword == Id.Assign_Readonly: lookup_mode = scope_e.Dynamic flags.append(var_flags_e.ReadOnly) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope_e.Dynamic else: raise AssertionError(node.keyword) for pair in node.pairs: if pair.op == assign_op_e.PlusEqual: assert pair.rhs, pair.rhs # I don't think a+= is valid? val = self.word_ev.EvalRhsWord(pair.rhs) old_val, lval = expr_eval.EvalLhsAndLookup(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Undef, value_e.Str): pass # val is RHS elif sig == (value_e.Undef, value_e.StrArray): pass # val is RHS elif sig == (value_e.Str, value_e.Str): val = value.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = value.StrArray(old_val.strs + val.strs) else: # plain assignment spid = pair.spids[0] # Source location for tracing lval = self._EvalLhs(pair.lhs, spid, lookup_mode) # RHS can be a string or array. if pair.rhs: val = self.word_ev.EvalRhsWord(pair.rhs) assert isinstance(val, value_t), val else: # e.g. 'readonly x' or 'local x' val = None # NOTE: In bash and mksh, declare -a myarray makes an empty cell with # Undef value, but the 'array' attribute. #log('setting %s to %s with flags %s', lval, val, flags) self.mem.SetVar(lval, val, flags, lookup_mode) # Assignment always appears to have a spid. if node.spids: current_spid = node.spids[0] else: current_spid = const.NO_INTEGER self.mem.SetCurrentSpanId(current_spid) self.tracer.OnAssignment(lval, pair.op, val, flags, lookup_mode) # PATCH to be compatible with existing shells: If the assignment had a # command sub like: # # s=$(echo one; false) # # then its status will be in mem.last_status, and we can check it here. # If there was NOT a command sub in the assignment, then we don't want to # check it. if node.keyword == Id.Assign_None: # mutate existing local or global # Only do this if there was a command sub? How? Look at node? # Set a flag in mem? self.mem.last_status or if self.check_command_sub_status: self._CheckStatus(self.mem.last_status, node) # A global assignment shouldn't clear $?. status = self.mem.last_status else: status = 0 else: # To be compatible with existing shells, local assignments DO clear # $?. Even in strict mode, we don't need to bother setting # check_errexit = True, because we would have already checked the # command sub in RunCommandSub. status = 0 # TODO: maybe we should have a "sane-status" that respects this: # false; echo $?; local f=x; echo $? elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.word_ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, exit 0, break 0 levels, etc. # NOTE: We don't do anything about a top-level 'return' here. Unlike in # bash, that is OK. If you can return from a sourced script, it makes # sense to return from a main script. ok = True tok = node.token if (tok.id in (Id.ControlFlow_Break, Id.ControlFlow_Continue) and self.loop_level == 0): ok = False msg = 'Invalid control flow at top level' if ok: raise _ControlFlow(tok, arg) if self.exec_opts.strict_control_flow: e_die(msg, token=tok) else: # Only print warnings, never fatal. # Bash oddly only exits 1 for 'return', but no other shell does. ui.PrintFilenameAndLine(tok.span_id, self.arena) util.warn(msg) status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) check_errexit = False elif node.tag == command_e.AndOr: # NOTE: && and || have EQUAL precedence in command mode. See case #13 # in dbracket.test.sh. left = node.children[0] # Suppress failure for every child except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() i = 1 n = len(node.children) while i < n: #log('i %d status %d', i, status) child = node.children[i] op_id = node.ops[i-1] #log('child %s op_id %s', child, op_id) if op_id == Id.Op_DPipe and status == 0: i += 1 continue # short circuit elif op_id == Id.Op_DAmp and status != 0: i += 1 continue # short circuit if i == n - 1: # errexit handled differently for last child status = self._Execute(child) check_errexit = True else: self._PushErrExit() try: status = self._Execute(child) finally: self._PopErrExit() i += 1 elif node.tag == command_e.WhileUntil: if node.keyword.id == Id.KW_While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 self.loop_level += 1 try: while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.word_ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop self.loop_level += 1 try: for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForExpr: status = 0 init, cond, body, update = node.init, node.cond, node.body, node.update if init: self.arith_ev.Eval(init) self.loop_level += 1 try: while True: if cond: b = self.arith_ev.Eval(cond) if not b: break try: status = self._Execute(body) except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise if update: self.arith_ev.Eval(update) finally: self.loop_level -= 1 elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) check_errexit = False # not real statements elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.word_ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? # TODO: case "$@") shouldn't succeed? That's a type error? # That requires strict-array? pat_val = self.word_ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? break # Only execute action ONCE if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime libc.print_time(real, user, sys_) else: raise NotImplementedError(node.__class__.__name__) return status, check_errexit
def Eval(self, line): """Returns an expanded line.""" if not self.readline_mod: return line tokens = list(HISTORY_LEXER.Tokens(line)) # Common case: no history expansion. if all(id_ == Id.History_Other for (id_, _) in tokens): return line history_len = self.readline_mod.get_current_history_length() if history_len <= 0: # no commands to expand return line self.debug_f.log('history length = %d', history_len) parts = [] for id_, val in tokens: if id_ == Id.History_Other: out = val elif id_ == Id.History_Op: # TODO: the current line was ALREADY entered in the history, so we have # to subtact 1. We should add it AFTER expanion, but Python's binding # might not allow that. We probably need to fork it. prev = self.readline_mod.get_history_item(history_len - 1) ch = val[1] if ch == '!': out = prev else: self.parse_ctx.trail.Clear() # not strictyl necessary? line_reader = StringLineReader(prev, self.parse_ctx.arena) c_parser = self.parse_ctx.MakeOshParser(line_reader) try: c_parser.ParseLogicalLine() except util.ParseError as e: #from core import ui #ui.PrettyPrintError(e, self.parse_ctx.arena) # Invalid command in history. TODO: We should never enter these. self.debug_f.log( "Couldn't parse historical command %r: %s", prev, e) # NOTE: We're using the trail rather than the return value of # ParseLogicalLine because it handles cases like # $ for i in 1 2 3; do sleep ${i}; done # $ echo !$ # which should expand to 'echo ${i}' words = self.parse_ctx.trail.words #self.debug_f.log('TRAIL WORDS: %s', words) if ch == '^': try: w = words[1] except IndexError: raise util.HistoryError("No first word in %r", prev) spid1 = word.LeftMostSpanForWord(w) spid2 = word.RightMostSpanForWord(w) elif ch == '$': try: w = words[-1] except IndexError: raise util.HistoryError("No last word in %r", prev) spid1 = word.LeftMostSpanForWord(w) spid2 = word.RightMostSpanForWord(w) elif ch == '*': try: w1 = words[1] w2 = words[-1] except IndexError: raise util.HistoryError( "Couldn't find words in %r", prev) spid1 = word.LeftMostSpanForWord(w1) spid2 = word.RightMostSpanForWord(w2) else: raise AssertionError(ch) arena = self.parse_ctx.arena span1 = arena.GetLineSpan(spid1) span2 = arena.GetLineSpan(spid2) begin = span1.col end = span2.col + span2.length out = prev[begin:end] elif id_ == Id.History_Num: index = int( val[1:]) # regex ensures this. Maybe have - on the front. if index < 0: num = history_len + index else: num = index out = self.readline_mod.get_history_item(num) if out is None: # out of range raise util.HistoryError('%s: not found', val) elif id_ == Id.History_Search: # Search backward prefix = None substring = None if val[1] == '?': substring = val[2:] else: prefix = val[1:] out = None for i in xrange(history_len, 1, -1): cmd = self.readline_mod.get_history_item(i) if prefix and cmd.startswith(prefix): out = cmd if substring and substring in cmd: out = cmd if out is not None: break if out is None: raise util.HistoryError('%r found no results', val) else: raise AssertionError(id_) parts.append(out) line = ''.join(parts) # show what we expanded to sys.stdout.write('! %s' % line) return line
def Matches(self, comp): """ Args: comp: Callback args from readline. Readline uses set_completer_delims to tokenize the string. Returns a list of matches relative to readline's completion_delims. We have to post-process the output of various completers. """ arena = self.parse_ctx.arena # Used by inner functions # Pass the original line "out of band" to the completion callback. line_until_tab = comp.line[:comp.end] self.comp_ui_state.line_until_tab = line_until_tab self.parse_ctx.trail.Clear() line_reader = reader.StringLineReader(line_until_tab, self.parse_ctx.arena) c_parser = self.parse_ctx.MakeOshParser(line_reader, emit_comp_dummy=True) # We want the output from parse_ctx, so we don't use the return value. try: c_parser.ParseLogicalLine() except util.ParseError as e: # e.g. 'ls | ' will not parse. Now inspect the parser state! pass debug_f = self.debug_f trail = self.parse_ctx.trail if 1: trail.PrintDebugString(debug_f) # # First try completing the shell language itself. # # NOTE: We get Eof_Real in the command state, but not in the middle of a # BracedVarSub. This is due to the difference between the CommandParser # and WordParser. tokens = trail.tokens last = -1 if tokens[-1].id == Id.Eof_Real: last -= 1 # ignore it try: t1 = tokens[last] except IndexError: t1 = None try: t2 = tokens[last-1] except IndexError: t2 = None debug_f.log('line: %r', comp.line) debug_f.log('rl_slice from byte %d to %d: %r', comp.begin, comp.end, comp.line[comp.begin:comp.end]) debug_f.log('t1 %s', t1) debug_f.log('t2 %s', t2) # Each of the 'yield' statements below returns a fully-completed line, to # appease the readline library. The root cause of this dance: If there's # one candidate, readline is responsible for redrawing the input line. OSH # only displays candidates and never redraws the input line. def _TokenStart(tok): span = arena.GetLineSpan(tok.span_id) return span.col if t2: # We always have t1? # echo $ if IsDollar(t2) and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) + 1 # 1 for $ for name in self.mem.VarNames(): yield line_until_tab + name # no need to quote var names return # echo ${ if t2.id == Id.Left_DollarBrace and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) + 2 # 2 for ${ for name in self.mem.VarNames(): yield line_until_tab + name # no need to quote var names return # echo $P if t2.id == Id.VSub_DollarName and IsDummy(t1): # Example: ${undef:-$P # readline splits at ':' so we have to prepend '-$' to every completed # variable name. self.comp_ui_state.display_pos = _TokenStart(t2) + 1 # 1 for $ to_complete = t2.val[1:] n = len(to_complete) for name in self.mem.VarNames(): if name.startswith(to_complete): yield line_until_tab + name[n:] # no need to quote var names return # echo ${P if t2.id == Id.VSub_Name and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) # no offset to_complete = t2.val n = len(to_complete) for name in self.mem.VarNames(): if name.startswith(to_complete): yield line_until_tab + name[n:] # no need to quote var names return # echo $(( VAR if t2.id == Id.Lit_ArithVarLike and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) # no offset to_complete = t2.val n = len(to_complete) for name in self.mem.VarNames(): if name.startswith(to_complete): yield line_until_tab + name[n:] # no need to quote var names return if trail.words: # echo ~<TAB> # echo ~a<TAB> $(home dirs) # This must be done at a word level, and TildeDetectAll() does NOT help # here, because they don't have trailing slashes yet! We can't do it on # tokens, because otherwise f~a will complete. Looking at word_part is # EXACTLY what we want. parts = trail.words[-1].parts if (len(parts) == 2 and parts[0].tag == word_part_e.LiteralPart and parts[1].tag == word_part_e.LiteralPart and parts[0].token.id == Id.Lit_TildeLike and parts[1].token.id == Id.Lit_CompDummy): t2 = parts[0].token # +1 for ~ self.comp_ui_state.display_pos = _TokenStart(parts[0].token) + 1 to_complete = t2.val[1:] n = len(to_complete) for u in pwd.getpwall(): # catch errors? name = u.pw_name if name.startswith(to_complete): yield line_until_tab + ShellQuoteB(name[n:]) + '/' return # echo hi > f<TAB> (complete redirect arg) if trail.redirects: r = trail.redirects[-1] # Only complete 'echo >', but not 'echo >&' or 'cat <<' if (r.tag == redir_e.Redir and REDIR_ARG_TYPES[r.op.id] == redir_arg_type_e.Path): if WordEndsWithCompDummy(r.arg_word): debug_f.log('Completing redirect arg') try: val = self.word_ev.EvalWordToString(r.arg_word) except util.FatalRuntimeError as e: debug_f.log('Error evaluating redirect word: %s', e) return if val.tag != value_e.Str: debug_f.log("Didn't get a string from redir arg") return span_id = word.LeftMostSpanForWord(r.arg_word) span = arena.GetLineSpan(span_id) self.comp_ui_state.display_pos = span.col comp.Update(to_complete=val.s) # FileSystemAction uses only this n = len(val.s) action = FileSystemAction(add_slash=True) for name in action.Matches(comp): yield line_until_tab + ShellQuoteB(name[n:]) return # # We're not completing the shell language. Delegate to user-defined # completion for external tools. # # Set below, and set on retries. base_opts = None user_spec = None # Used on retries. partial_argv = [] num_partial = -1 first = None if trail.words: # Now check if we're completing a word! if WordEndsWithCompDummy(trail.words[-1]): debug_f.log('Completing words') # # It didn't look like we need to complete var names, tilde, redirects, # etc. Now try partial_argv, which may involve invoking PLUGINS. # needed to complete paths with ~ words2 = word.TildeDetectAll(trail.words) if 0: debug_f.log('After tilde detection') for w in words2: print(w, file=debug_f) if 0: debug_f.log('words2:') for w2 in words2: debug_f.log(' %s', w2) for w in words2: try: # TODO: # - Should we call EvalWordSequence? But turn globbing off? It # can do splitting and such. # - We could have a variant to eval TildeSubPart to ~ ? val = self.word_ev.EvalWordToString(w) except util.FatalRuntimeError: # Why would it fail? continue if val.tag == value_e.Str: partial_argv.append(val.s) else: pass debug_f.log('partial_argv: %s', partial_argv) num_partial = len(partial_argv) first = partial_argv[0] alias_first = None debug_f.log('alias_words: %s', trail.alias_words) if trail.alias_words: w = trail.alias_words[0] try: val = self.word_ev.EvalWordToString(w) except util.FatalRuntimeError: pass alias_first = val.s debug_f.log('alias_first: %s', alias_first) if num_partial == 0: # should never happen because of Lit_CompDummy raise AssertionError elif num_partial == 1: base_opts, user_spec = self.comp_lookup.GetFirstSpec() # Display/replace since the beginning of the first word. Note: this # is non-zero in the case of # echo $(gr and # echo `gr span_id = word.LeftMostSpanForWord(trail.words[0]) span = arena.GetLineSpan(span_id) self.comp_ui_state.display_pos = span.col self.debug_f.log('** DISPLAY_POS = %d', self.comp_ui_state.display_pos) else: base_opts, user_spec = self.comp_lookup.GetSpecForName(first) if not user_spec and alias_first: base_opts, user_spec = self.comp_lookup.GetSpecForName(alias_first) if user_spec: # Pass the aliased command to the user-defined function, and use # it for retries. first = alias_first if not user_spec: base_opts, user_spec = self.comp_lookup.GetFallback() # Display since the beginning span_id = word.LeftMostSpanForWord(trail.words[-1]) span = arena.GetLineSpan(span_id) self.comp_ui_state.display_pos = span.col self.debug_f.log('words[-1]: %r', trail.words[-1]) self.debug_f.log('display_pos %d', self.comp_ui_state.display_pos) # Update the API for user-defined functions. index = len(partial_argv) - 1 # COMP_CWORD is -1 when it's empty prev = '' if index == 0 else partial_argv[index-1] comp.Update(first=first, to_complete=partial_argv[-1], prev=prev, index=index, partial_argv=partial_argv) # This happens in the case of [[ and ((, or a syntax error like 'echo < >'. if not user_spec: debug_f.log("Didn't find anything to complete") return # Reset it back to what was registered. User-defined functions can mutate # it. dynamic_opts = {} self.compopt_state.dynamic_opts = dynamic_opts self.compopt_state.currently_completing = True try: done = False while not done: try: for candidate in self._PostProcess( base_opts, dynamic_opts, user_spec, comp): yield candidate except _RetryCompletion as e: debug_f.log('Got 124, trying again ...') # Get another user_spec. The ShellFuncAction may have 'sourced' code # and run 'complete' to mutate comp_lookup, and we want to get that # new entry. if num_partial == 0: raise AssertionError elif num_partial == 1: base_opts, user_spec = self.comp_lookup.GetFirstSpec() else: # (already processed alias_first) base_opts, user_spec = self.comp_lookup.GetSpecForName(first) if not user_spec: base_opts, user_spec = self.comp_lookup.GetFallback() else: done = True # exhausted candidates without getting a retry finally: self.compopt_state.currently_completing = False