def _InitVarsFromEnv(self, environ): # This is the way dash and bash work -- at startup, they turn everything in # 'environ' variable into shell variables. Bash has an export_env # variable. Dash has a loop through environ in init.c for n, v in environ.iteritems(): self.SetVar(ast.LhsName(n), runtime.Str(v), (var_flags_e.Exported, ), scope_e.GlobalOnly) # If it's not in the environment, initialize it. This makes it easier to # update later in ExecOpts. # TODO: IFS, PWD, etc. should follow this pattern. Maybe need a SysCall # interface? self.syscall.getcwd() etc. v = self.GetVar('SHELLOPTS') if v.tag == value_e.Undef: SetGlobalString(self, 'SHELLOPTS', '') # Now make it readonly self.SetVar(ast.LhsName('SHELLOPTS'), None, (var_flags_e.ReadOnly, ), scope_e.GlobalOnly) v = self.GetVar('HOME') if v.tag == value_e.Undef: home_dir = util.GetHomeDir() or '~' # No expansion if not found? SetGlobalString(self, 'HOME', home_dir)
def SetStringDynamic(mem, name, s): """Set a string by looking up the stack. Used for getopts. """ assert isinstance(s, str) mem.SetVar(ast.LhsName(name), runtime.Str(s), (), scope_e.Dynamic)
def testShellFuncExecution(self): ex = cmd_exec_test.InitExecutor() func_node = ast.FuncDef() c1 = ast.CompoundWord() t1 = ast.token(Id.Lit_Chars, 'f1') c1.parts.append(ast.LiteralPart(t1)) c2 = ast.CompoundWord() t2 = ast.token(Id.Lit_Chars, 'f2') c2.parts.append(ast.LiteralPart(t2)) a = ast.ArrayLiteralPart() a.words = [c1, c2] w = ast.CompoundWord() w.parts.append(a) # Set global COMPREPLY=(f1 f2) pair = ast.assign_pair(ast.LhsName('COMPREPLY'), assign_op_e.Equal, w) pair.spids.append(0) # dummy pairs = [pair] body_node = ast.Assignment(Id.Assign_None, [], pairs) func_node.name = 'myfunc' func_node.body = body_node a = completion.ShellFuncAction(ex, func_node) matches = list(a.Matches([], 0, 'f')) self.assertEqual(['f1 ', 'f2 '], matches)
def SetLocalString(mem, name, s): """Set a local string. Used for: 1) for loop iteration variables 2) temporary environments like FOO=bar BAR=$FOO cmd, 3) read builtin """ assert isinstance(s, str) mem.SetVar(ast.LhsName(name), runtime.Str(s), (), scope_e.LocalOnly)
def ToLValue(node): """Determine if a node is a valid L-value by whitelisting tags. Args: node: ExprNode (could be VarExprNode or BinaryExprNode) """ # foo = bar, foo[1] = bar if node.tag == arith_expr_e.ArithVarRef: return ast.LhsName(node.name) if node.tag == arith_expr_e.ArithBinary: # For example, a[0][0] = 1 is NOT valid. if (node.op_id == Id.Arith_LBracket and node.left.tag == arith_expr_e.ArithVarRef): return ast.LhsIndexedName(node.left.name, node.right) return None
def ToLValue(node): """Determine if a node is a valid L-value by whitelisting tags. Args: node: ExprNode (could be VarExprNode or BinaryExprNode) """ # foo = bar, foo[1] = bar if node.tag == arith_expr_e.ArithVarRef: # For consistency with osh/cmd_parse.py, append a span_id. # TODO: (( a[ x ] = 1 )) and a[x]=1 should use different LST nodes. lhs_expr = ast.LhsName(node.token.val) lhs_expr.spids.append(node.token.span_id) return lhs_expr if node.tag == arith_expr_e.ArithBinary: # For example, a[0][0] = 1 is NOT valid. if (node.op_id == Id.Arith_LBracket and node.left.tag == arith_expr_e.ArithVarRef): return ast.LhsIndexedName(node.left.token.val, node.right) return None
def _Dispatch(self, node, fork_external): # If we call RunCommandSub in a recursive call to the executor, this will # be set true (if strict-errexit is false). But it only lasts for one # command. self.check_command_sub_status = False #argv0 = None # for error message check_errexit = False # for errexit if node.tag == command_e.SimpleCommand: check_errexit = True # Find span_id for a basic implementation of $LINENO, e.g. # PS4='+$SOURCE_NAME:$LINENO:' # NOTE: osh2oil uses node.more_env, but we don't need that. span_id = const.NO_INTEGER if node.words: first_word = node.words[0] span_id = word.LeftMostSpanForWord(first_word) self.mem.SetCurrentSpanId(span_id) # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up logging. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.word_ev.EvalWordSequence(words) # This comes before evaluating env, in case there are problems evaluating # it. We could trace the env separately? Also trace unevaluated code # with set-o verbose? self.tracer.OnSimpleCommand(argv) if node.more_env: self.mem.PushTemp() try: for env_pair in node.more_env: val = self.word_ev.EvalWordToString(env_pair.val) # Set each var so the next one can reference it. Example: # FOO=1 BAR=$FOO ls / self.mem.SetVar(ast.LhsName(env_pair.name), val, (var_flags_e.Exported,), scope_e.TempEnv) # NOTE: This might never return! In the case of fork_external=False. status = self._RunSimpleCommand(argv, fork_external, span_id) finally: if node.more_env: self.mem.PopTemp() elif node.tag == command_e.Sentence: # Don't check_errexit since this isn't a real node! if node.terminator.id == Id.Op_Semi: status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: check_errexit = True if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: check_errexit = True # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.child) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: check_errexit = True result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: check_errexit = True i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: flags = word_compile.ParseAssignFlags(node.flags) if node.keyword == Id.Assign_Local: lookup_mode = scope_e.LocalOnly # typeset and declare are synonyms? I see typeset -a a=() the most. elif node.keyword in (Id.Assign_Declare, Id.Assign_Typeset): # declare is like local, except it can also be used outside functions? if var_flags_e.Global in flags: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly elif node.keyword == Id.Assign_Readonly: lookup_mode = scope_e.Dynamic flags.append(var_flags_e.ReadOnly) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope_e.Dynamic else: raise AssertionError(node.keyword) for pair in node.pairs: if pair.op == assign_op_e.PlusEqual: assert pair.rhs, pair.rhs # I don't think a+= is valid? val = self.word_ev.EvalRhsWord(pair.rhs) old_val, lval = expr_eval.EvalLhsAndLookup(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Undef, value_e.Str): pass # val is RHS elif sig == (value_e.Undef, value_e.StrArray): pass # val is RHS elif sig == (value_e.Str, value_e.Str): val = runtime.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = runtime.StrArray(old_val.strs + val.strs) else: # plain assignment spid = pair.spids[0] # Source location for tracing lval = self._EvalLhs(pair.lhs, spid, lookup_mode) # RHS can be a string or array. if pair.rhs: val = self.word_ev.EvalRhsWord(pair.rhs) assert isinstance(val, runtime.value), val else: # e.g. 'readonly x' or 'local x' val = None # NOTE: In bash and mksh, declare -a myarray makes an empty cell with # Undef value, but the 'array' attribute. #log('setting %s to %s with flags %s', lval, val, flags) self.mem.SetVar(lval, val, flags, lookup_mode, strict_array=self.exec_opts.strict_array) # Assignment always appears to have a spid. if node.spids: current_spid = node.spids[0] else: current_spid = const.NO_INTEGER self.mem.SetCurrentSpanId(current_spid) self.tracer.OnAssignment(lval, pair.op, val, flags, lookup_mode) # PATCH to be compatible with existing shells: If the assignment had a # command sub like: # # s=$(echo one; false) # # then its status will be in mem.last_status, and we can check it here. # If there was NOT a command sub in the assignment, then we don't want to # check it. if node.keyword == Id.Assign_None: # mutate existing local or global # Only do this if there was a command sub? How? Look at node? # Set a flag in mem? self.mem.last_status or if self.check_command_sub_status: self._CheckStatus(self.mem.last_status, node) # A global assignment shouldn't clear $?. status = self.mem.last_status else: status = 0 else: # To be compatible with existing shells, local assignments DO clear # $?. Even in strict mode, we don't need to bother setting # check_errexit = True, because we would have already checked the # command sub in RunCommandSub. status = 0 # TODO: maybe we should have a "sane-status" that respects this: # false; echo $?; local f=x; echo $? elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.word_ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, exit 0, break 0 levels, etc. # NOTE: We don't do anything about a top-level 'return' here. Unlike in # bash, that is OK. If you can return from a sourced script, it makes # sense to return from a main script. ok = True tok = node.token if (tok.id in (Id.ControlFlow_Break, Id.ControlFlow_Continue) and self.loop_level == 0): ok = False msg = 'Invalid control flow at top level' if ok: raise _ControlFlow(tok, arg) if self.exec_opts.strict_control_flow: e_die(msg, token=tok) else: # Only print warnings, never fatal. # Bash oddly only exits 1 for 'return', but no other shell does. ui.PrintFilenameAndLine(tok.span_id, self.arena) util.warn(msg) status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) check_errexit = False elif node.tag == command_e.AndOr: # NOTE: && and || have EQUAL precedence in command mode. See case #13 # in dbracket.test.sh. left = node.children[0] # Suppress failure for every child except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() i = 1 n = len(node.children) while i < n: #log('i %d status %d', i, status) child = node.children[i] op_id = node.ops[i-1] #log('child %s op_id %s', child, op_id) if op_id == Id.Op_DPipe and status == 0: i += 1 continue # short circuit elif op_id == Id.Op_DAmp and status != 0: i += 1 continue # short circuit if i == n - 1: # errexit handled differently for last child status = self._Execute(child) check_errexit = True else: self._PushErrExit() try: status = self._Execute(child) finally: self._PopErrExit() i += 1 elif node.tag == command_e.WhileUntil: if node.keyword.id == Id.KW_While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 self.loop_level += 1 try: while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.word_ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop self.loop_level += 1 try: for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForExpr: status = 0 init, cond, body, update = node.init, node.cond, node.body, node.update if init: self.arith_ev.Eval(init) self.loop_level += 1 try: while True: if cond: b = self.arith_ev.Eval(cond) if not b: break try: status = self._Execute(body) except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise if update: self.arith_ev.Eval(update) finally: self.loop_level -= 1 elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) check_errexit = False # not real statements elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.word_ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? # TODO: case "$@") shouldn't succeed? That's a type error? # That requires strict-array? pat_val = self.word_ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? break # Only execute action ONCE if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime libc.print_time(real, user, sys_) else: raise NotImplementedError(node.__class__.__name__) return status, check_errexit
def SetGlobalArray(mem, name, a): """Helper for completion.""" assert isinstance(a, list) mem.SetVar(ast.LhsName(name), runtime.StrArray(a), (), scope_e.GlobalOnly)
def SetGlobalString(mem, name, s): """Helper for completion, $PWD, etc.""" assert isinstance(s, str) val = runtime.Str(s) mem.SetVar(ast.LhsName(name), val, (), scope_e.GlobalOnly)
def ParseSimpleCommand(self): """ Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g) io_file : '<' filename | LESSAND filename ... io_here : DLESS here_end | DLESSDASH here_end redirect : IO_NUMBER (io_redirect | io_here) prefix_part : ASSIGNMENT_WORD | redirect cmd_part : WORD | redirect assign_kw : Declare | Export | Local | Readonly # Without any words it is parsed as a command, not an assigment assign_listing : assign_kw # Now we have something to do (might be changing assignment flags too) # NOTE: any prefixes should be a warning, but they are allowed in shell. assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+ # an external command, a function call, or a builtin -- a "word_command" word_command : prefix_part* cmd_part+ simple_command : assign_listing | assignment | proc_command Simple imperative algorithm: 1) Read a list of words and redirects. Append them to separate lists. 2) Look for the first non-assignment word. If it's declare, etc., then keep parsing words AND assign words. Otherwise, just parse words. 3) If there are no non-assignment words, then it's a global assignment. { redirects, global assignments } OR { redirects, prefix_bindings, words } OR { redirects, ERROR_prefix_bindings, keyword, assignments, words } THEN CHECK that prefix bindings don't have any array literal parts! global assignment and keyword assignments can have the of course. well actually EXPORT shouldn't have them either -- WARNING 3 cases we want to warn: prefix_bindings for assignment, and array literal in prefix bindings, or export A command can be an assignment word, word, or redirect on its own. ls >out.txt >out.txt FOO=bar # this touches the file, and hten Or any sequence: ls foo bar <in.txt ls foo bar >out.txt <in.txt ls >out.txt foo bar Or add one or more environment bindings: VAR=val env >out.txt VAR=val env here_end vs filename is a matter of whether we test that it's quoted. e.g. <<EOF vs <<'EOF'. """ result = self._ScanSimpleCommand() if not result: return None redirects, words = result if not words: # e.g. >out.txt # redirect without words node = ast.SimpleCommand() node.redirects = redirects return node prefix_bindings, suffix_words = self._SplitSimpleCommandPrefix(words) if not suffix_words: # ONE=1 TWO=2 (with no other words) if redirects: binding1 = prefix_bindings[0] _, _, _, spid = binding1 self.AddErrorContext('Got redirects in global assignment', span_id=spid) return None pairs = [] for lhs, op, rhs, spid in prefix_bindings: p = ast.assign_pair(ast.LhsName(lhs), op, rhs) p.spids.append(spid) pairs.append(p) node = ast.Assignment(Id.Assign_None, [], pairs) left_spid = word.LeftMostSpanForWord(words[0]) node.spids.append(left_spid) # no keyword spid to skip past return node kind, kw_token = word.KeywordToken(suffix_words[0]) if kind == Kind.Assign: # Here we StaticEval suffix_words[1] to see if it's a command like # 'typeset -p'. Then it becomes a SimpleCommand node instead of an # Assignment. Note we're not handling duplicate flags like 'typeset # -pf'. I see this in bashdb (bash debugger) but it can just be changed # to 'typeset -p -f'. is_command = False if len(suffix_words) > 1: ok, val, _ = word.StaticEval(suffix_words[1]) if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS: is_command = True if is_command: # declare -f, declare -p, typeset -p, etc. node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects) return node else: # declare str='', declare -a array=() if redirects: # Attach the error location to the keyword. It would be more precise # to attach it to the self.AddErrorContext('Got redirects in assignment', token=kw_token) return None if prefix_bindings: # FOO=bar local spam=eggs not allowed # Use the location of the first value. TODO: Use the whole word # before splitting. _, _, v0, _ = prefix_bindings[0] self.AddErrorContext( 'Invalid prefix bindings in assignment: %s', prefix_bindings, word=v0) return None node = self._MakeAssignment(kw_token.id, suffix_words) if not node: return None node.spids.append(kw_token.span_id) return node elif kind == Kind.ControlFlow: if redirects: self.AddErrorContext('Got redirects in control flow: %s', redirects) return None if prefix_bindings: # FOO=bar local spam=eggs not allowed # Use the location of the first value. TODO: Use the whole word before # splitting. _, _, v0, _ = prefix_bindings[0] self.AddErrorContext( 'Invalid prefix bindings in control flow: %s', prefix_bindings, word=v0) return None # Attach the token for errors. (Assignment may not need it.) if len(suffix_words) == 1: arg_word = None elif len(suffix_words) == 2: arg_word = suffix_words[1] else: # Underline the extra word. self.AddErrorContext( 'Unexpected argument to %r', kw_token.val, word=suffix_words[2]) return None return ast.ControlFlow(kw_token, arg_word) else: node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects) return node
def _MakeAssignment(self, assign_kw, suffix_words): # First parse flags, e.g. -r -x -a -A. None of the flags have arguments. flags = [] n = len(suffix_words) i = 1 while i < n: w = suffix_words[i] ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: break # can't statically evaluate if static_val.startswith('-'): flags.append(static_val) else: break # not a flag, rest are args i += 1 # Now parse bindings or variable names assignments = [] while i < n: w = suffix_words[i] left_spid = word.LeftMostSpanForWord(w) kov = word.LooksLikeAssignment(w) if kov: k, op, v = kov t = word.TildeDetect(v) if t: # t is an unevaluated word with TildeSubPart a = (k, op, t, left_spid) else: a = (k, op, v, left_spid) # v is unevaluated word else: # In aboriginal in variables/sources: export_if_blank does export "$1". # We should allow that. # Parse this differently then? # dynamic-export? # It sets global variables. ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: self.AddErrorContext( 'Variable names must be constant strings, got %s', w, word=w) return None # No value is equivalent to '' if not match.IsValidVarName(static_val): self.AddErrorContext('Invalid variable name %r', static_val, word=w) return None a = (static_val, assign_op_e.Equal, None, left_spid) assignments.append(a) i += 1 # TODO: Also make with LhsIndexedName pairs = [] for lhs, op, rhs, spid in assignments: p = ast.assign_pair(ast.LhsName(lhs), op, rhs) p.spids.append(spid) pairs.append(p) node = ast.Assignment(assign_kw, flags, pairs) return node