def Read(argv, splitter, mem): arg, i = READ_SPEC.Parse(argv) names = argv[i:] if arg.n is not None: # read a certain number of bytes try: name = names[0] except IndexError: name = 'REPLY' # default variable name s = posix.read(sys.stdin.fileno(), arg.n) #log('read -n: %s = %s', name, s) state.SetLocalString(mem, name, s) # NOTE: Even if we don't get n bytes back, there is no error? return 0 if not names: names.append('REPLY') # leftover words assigned to the last name max_results = len(names) # We have to read more than one line if there is a line continuation (and # it's not -r). parts = [] join_next = False while True: line = ReadLineFromStdin() #log('LINE %r', line) if not line: # EOF status = 1 break if line.endswith('\n'): # strip trailing newline line = line[:-1] status = 0 else: # odd bash behavior: fail even if we can set variables. status = 1 spans = splitter.SplitForRead(line, not arg.r) done, join_next = _AppendParts(line, spans, max_results, join_next, parts) #log('PARTS %s continued %s', parts, continued) if done: break for i in xrange(max_results): try: s = parts[i] except IndexError: s = '' # if there are too many variables #log('read: %s = %s', names[i], s) state.SetLocalString(mem, names[i], s) return status
def InitEvaluator(): word_ev = test_lib.InitWordEvaluator() state.SetLocalString(word_ev.mem, 'x', '- -- ---') state.SetLocalString(word_ev.mem, 'y', 'y yy') state.SetLocalString(word_ev.mem, 'empty', '') state.SetLocalString(word_ev.mem, 'binding', 'spam=eggs') state.SetLocalString(word_ev.mem, 'binding_with_spaces', 'x=green eggs and ham') word_ev.mem.SetArgv(['x', 'foo', 'spam=eggs']) return word_ev
def Read(argv, mem): # TODO: # - Use IFS instead of Python's split(). arg, i = READ_SPEC.Parse(argv) if not arg.r: util.warn('*** read without -r not implemented ***') names = argv[i:] if arg.n is not None: try: name = names[0] except IndexError: name = 'REPLY' # default variable name s = os.read(sys.stdin.fileno(), arg.n) #log('read -n: %s = %s', name, s) state.SetLocalString(mem, name, s) # NOTE: Even if we don't get n bytes back, there is no error? return 0 line = sys.stdin.readline() if not line: # EOF return 1 if line.endswith('\n'): # strip trailing newline line = line[:-1] status = 0 else: # odd bash behavior: fail even if we can set variables. status = 1 # leftover words assigned to the last name n = len(names) strs = line.split(None, n - 1) # TODO: Use REPLY variable here too? for i in xrange(n): try: s = strs[i] except IndexError: s = '' # if there are too many variables #log('read: %s = %s', names[i], s) state.SetLocalString(mem, names[i], s) return status
def Read(argv, mem): # TODO: # - parse flags. # - Use IFS instead of Python's split(). names = argv line = sys.stdin.readline() if not line: # EOF return 1 if line.endswith('\n'): # strip trailing newline line = line[:-1] status = 0 else: # odd bash behavior: fail even if we can set variables. status = 1 # leftover words assigned to the last name n = len(names) strs = line.split(None, n - 1) for i in xrange(n): try: s = strs[i] except IndexError: s = '' # if there are too many variables state.SetLocalString(mem, names[i], s) return status
def _Dispatch(self, node, fork_external): # If we call RunCommandSub in a recursive call to the executor, this will # be set true (if strict-errexit is false). But it only lasts for one # command. self.check_command_sub_status = False #argv0 = None # for error message check_errexit = False # for errexit if node.tag == command_e.SimpleCommand: check_errexit = True # Find span_id for a basic implementation of $LINENO, e.g. # PS4='+$SOURCE_NAME:$LINENO:' # NOTE: osh2oil uses node.more_env, but we don't need that. span_id = const.NO_INTEGER if node.words: first_word = node.words[0] span_id = word.LeftMostSpanForWord(first_word) self.mem.SetCurrentSpanId(span_id) # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up logging. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.word_ev.EvalWordSequence(words) # This comes before evaluating env, in case there are problems evaluating # it. We could trace the env separately? Also trace unevaluated code # with set-o verbose? self.tracer.OnSimpleCommand(argv) if node.more_env: self.mem.PushTemp() try: for env_pair in node.more_env: val = self.word_ev.EvalWordToString(env_pair.val) # Set each var so the next one can reference it. Example: # FOO=1 BAR=$FOO ls / self.mem.SetVar(ast.LhsName(env_pair.name), val, (var_flags_e.Exported,), scope_e.TempEnv) # NOTE: This might never return! In the case of fork_external=False. status = self._RunSimpleCommand(argv, fork_external, span_id) finally: if node.more_env: self.mem.PopTemp() elif node.tag == command_e.Sentence: # Don't check_errexit since this isn't a real node! if node.terminator.id == Id.Op_Semi: status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: check_errexit = True if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: check_errexit = True # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.child) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: check_errexit = True result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: check_errexit = True i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: flags = word_compile.ParseAssignFlags(node.flags) if node.keyword == Id.Assign_Local: lookup_mode = scope_e.LocalOnly # typeset and declare are synonyms? I see typeset -a a=() the most. elif node.keyword in (Id.Assign_Declare, Id.Assign_Typeset): # declare is like local, except it can also be used outside functions? if var_flags_e.Global in flags: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly elif node.keyword == Id.Assign_Readonly: lookup_mode = scope_e.Dynamic flags.append(var_flags_e.ReadOnly) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope_e.Dynamic else: raise AssertionError(node.keyword) for pair in node.pairs: if pair.op == assign_op_e.PlusEqual: assert pair.rhs, pair.rhs # I don't think a+= is valid? val = self.word_ev.EvalRhsWord(pair.rhs) old_val, lval = expr_eval.EvalLhsAndLookup(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Undef, value_e.Str): pass # val is RHS elif sig == (value_e.Undef, value_e.StrArray): pass # val is RHS elif sig == (value_e.Str, value_e.Str): val = runtime.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = runtime.StrArray(old_val.strs + val.strs) else: # plain assignment spid = pair.spids[0] # Source location for tracing lval = self._EvalLhs(pair.lhs, spid, lookup_mode) # RHS can be a string or array. if pair.rhs: val = self.word_ev.EvalRhsWord(pair.rhs) assert isinstance(val, runtime.value), val else: # e.g. 'readonly x' or 'local x' val = None # NOTE: In bash and mksh, declare -a myarray makes an empty cell with # Undef value, but the 'array' attribute. #log('setting %s to %s with flags %s', lval, val, flags) self.mem.SetVar(lval, val, flags, lookup_mode, strict_array=self.exec_opts.strict_array) # Assignment always appears to have a spid. if node.spids: current_spid = node.spids[0] else: current_spid = const.NO_INTEGER self.mem.SetCurrentSpanId(current_spid) self.tracer.OnAssignment(lval, pair.op, val, flags, lookup_mode) # PATCH to be compatible with existing shells: If the assignment had a # command sub like: # # s=$(echo one; false) # # then its status will be in mem.last_status, and we can check it here. # If there was NOT a command sub in the assignment, then we don't want to # check it. if node.keyword == Id.Assign_None: # mutate existing local or global # Only do this if there was a command sub? How? Look at node? # Set a flag in mem? self.mem.last_status or if self.check_command_sub_status: self._CheckStatus(self.mem.last_status, node) # A global assignment shouldn't clear $?. status = self.mem.last_status else: status = 0 else: # To be compatible with existing shells, local assignments DO clear # $?. Even in strict mode, we don't need to bother setting # check_errexit = True, because we would have already checked the # command sub in RunCommandSub. status = 0 # TODO: maybe we should have a "sane-status" that respects this: # false; echo $?; local f=x; echo $? elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.word_ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, exit 0, break 0 levels, etc. # NOTE: We don't do anything about a top-level 'return' here. Unlike in # bash, that is OK. If you can return from a sourced script, it makes # sense to return from a main script. ok = True tok = node.token if (tok.id in (Id.ControlFlow_Break, Id.ControlFlow_Continue) and self.loop_level == 0): ok = False msg = 'Invalid control flow at top level' if ok: raise _ControlFlow(tok, arg) if self.exec_opts.strict_control_flow: e_die(msg, token=tok) else: # Only print warnings, never fatal. # Bash oddly only exits 1 for 'return', but no other shell does. ui.PrintFilenameAndLine(tok.span_id, self.arena) util.warn(msg) status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) check_errexit = False elif node.tag == command_e.AndOr: # NOTE: && and || have EQUAL precedence in command mode. See case #13 # in dbracket.test.sh. left = node.children[0] # Suppress failure for every child except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() i = 1 n = len(node.children) while i < n: #log('i %d status %d', i, status) child = node.children[i] op_id = node.ops[i-1] #log('child %s op_id %s', child, op_id) if op_id == Id.Op_DPipe and status == 0: i += 1 continue # short circuit elif op_id == Id.Op_DAmp and status != 0: i += 1 continue # short circuit if i == n - 1: # errexit handled differently for last child status = self._Execute(child) check_errexit = True else: self._PushErrExit() try: status = self._Execute(child) finally: self._PopErrExit() i += 1 elif node.tag == command_e.WhileUntil: if node.keyword.id == Id.KW_While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 self.loop_level += 1 try: while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.word_ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop self.loop_level += 1 try: for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForExpr: status = 0 init, cond, body, update = node.init, node.cond, node.body, node.update if init: self.arith_ev.Eval(init) self.loop_level += 1 try: while True: if cond: b = self.arith_ev.Eval(cond) if not b: break try: status = self._Execute(body) except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise if update: self.arith_ev.Eval(update) finally: self.loop_level -= 1 elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) check_errexit = False # not real statements elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.word_ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? # TODO: case "$@") shouldn't succeed? That's a type error? # That requires strict-array? pat_val = self.word_ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? break # Only execute action ONCE if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime libc.print_time(real, user, sys_) else: raise NotImplementedError(node.__class__.__name__) return status, check_errexit
def InitEvaluator(): word_ev = test_lib.InitWordEvaluator() state.SetLocalString(word_ev.mem, 'x', 'xxx') state.SetLocalString(word_ev.mem, 'y', 'yyy') return word_ev
def Run(self, cmd_val): # type: (cmd_value__Argv) -> int arg, i = READ_SPEC.ParseCmdVal(cmd_val) names = cmd_val.argv[i:] if arg.n is not None: # read a certain number of bytes stdin = sys.stdin.fileno() try: name = names[0] except IndexError: name = 'REPLY' # default variable name s = "" if sys.stdin.isatty(): # set stdin to read in unbuffered mode orig_attrs = termios.tcgetattr(stdin) attrs = termios.tcgetattr(stdin) # disable canonical (buffered) mode # see `man termios` for an extended discussion attrs[3] &= ~termios.ICANON try: termios.tcsetattr(stdin, termios.TCSANOW, attrs) # posix.read always returns a single character in unbuffered mode while arg.n > 0: s += posix.read(stdin, 1) arg.n -= 1 finally: termios.tcsetattr(stdin, termios.TCSANOW, orig_attrs) else: s_len = 0 while arg.n > 0: buf = posix.read(stdin, arg.n) # EOF if buf == '': break arg.n -= len(buf) s += buf state.SetLocalString(self.mem, name, s) # NOTE: Even if we don't get n bytes back, there is no error? return 0 if not names: names.append('REPLY') # leftover words assigned to the last name if arg.a: max_results = 0 # no max else: max_results = len(names) # We have to read more than one line if there is a line continuation (and # it's not -r). parts = [] join_next = False while True: line = ReadLineFromStdin() #log('LINE %r', line) if not line: # EOF status = 1 break if line.endswith('\n'): # strip trailing newline line = line[:-1] status = 0 else: # odd bash behavior: fail even if we can set variables. status = 1 spans = self.splitter.SplitForRead(line, not arg.r) done, join_next = _AppendParts(line, spans, max_results, join_next, parts) #log('PARTS %s continued %s', parts, continued) if done: break if arg.a: state.SetArrayDynamic(self.mem, arg.a, parts) else: for i in xrange(max_results): try: s = parts[i] except IndexError: s = '' # if there are too many variables #log('read: %s = %s', names[i], s) state.SetStringDynamic(self.mem, names[i], s) return status
def _EvalBracedVarSub(self, part, part_vals, quoted): """ Args: part_vals: output param to append to. """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool maybe_decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process maybe_decay_array here before returning. maybe_decay_array = False # for $*, ${a[*]}, etc. var_name = None # For ${foo=default} # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.GetVar(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, maybe_decay_array = self._EvalSpecialVar( part.token.id, quoted) # 2. Bracket: value -> (value v, bool maybe_decay_array) # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER # suffix ops are applied. If we take the length with a prefix op, the # distinction is ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: maybe_decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with @: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. val = runtime.StrArray(val.strs) elif op_id == Id.Arith_Star: maybe_decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with *: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. # ${a[*]} or "${a[*]}" : maybe_decay_array is always true val = runtime.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # Bash treats any string as an array, so we can't add our own # behavior here without making valid OSH invalid bash. e_die("Can't index string %r with integer", part.token.val, token=part.token) elif val.tag == value_e.StrArray: index = self.arith_ev.Eval(anode) try: # could be None because representation is sparse s = val.strs[index] except IndexError: s = None if s is None: val = runtime.Undef() else: val = runtime.Str(s) elif val.tag == value_e.AssocArray: key = self.arith_ev.Eval(anode, int_coerce=False) try: val = runtime.Str(val.d[key]) except KeyError: val = runtime.Undef() else: raise AssertionError(val.__class__.__name__) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op) # NOTE: When applying the length operator, we can't have a test or # suffix afterward. And we don't want to decay the array elif part.suffix_op: op = part.suffix_op if op.tag == suffix_op_e.StringNullary: if op.op_id == Id.VOp0_P: # TODO: Use dependency injection #val = self.prompt._EvalPS1(val) prompt = ui.PROMPT.EvalPrompt(val) val = runtime.Str(prompt) else: raise NotImplementedError(op.op_id) elif op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # TODO: Change style to: # if self._ApplyTestOp(...) # return # It should return whether anything was done. If not, we continue to # the end, where we might throw an error. assign_part_vals, effect = self._ApplyTestOp( val, part.suffix_op, quoted, part_vals) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == effect_e.SpliceParts: return # EARLY RETURN, part_vals mutated elif effect == effect_e.SpliceAndAssign: if var_name is None: # TODO: error context e_die("Can't assign to special variable") else: # NOTE: This decays arrays too! 'set -o strict_array' could # avoid it. rhs_str = _DecayPartValuesToString( assign_part_vals, self.splitter.GetJoinChar()) state.SetLocalString(self.mem, var_name, rhs_str) return # EARLY RETURN, part_vals mutated elif effect == effect_e.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyUnarySuffixOp(val, part.suffix_op) elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized val = self._EmptyStrOrError(val) # ${undef//x/y} pat_val = self.EvalWordToString(op.pat, do_fnmatch=True) assert pat_val.tag == value_e.Str, pat_val if op.replace: replace_val = self.EvalWordToString(op.replace, do_fnmatch=True) assert replace_val.tag == value_e.Str, replace_val replace_str = replace_val.s else: replace_str = '' # Either GlobReplacer or ConstStringReplacer replacer = libstr.MakeReplacer(pat_val.s, replace_str, op.spids[0]) if val.tag == value_e.Str: s = replacer.Replace(val.s, op) val = runtime.Str(s) elif val.tag == value_e.StrArray: strs = [] for s in val.strs: if s is not None: strs.append(replacer.Replace(s, op)) val = runtime.StrArray(strs) else: raise AssertionError(val.__class__.__name__) elif op.tag == suffix_op_e.Slice: val = self._EmptyStrOrError(val) # ${undef:3:1} if op.begin: begin = self.arith_ev.Eval(op.begin) else: begin = 0 if op.length: length = self.arith_ev.Eval(op.length) else: length = None if val.tag == value_e.Str: # Slice UTF-8 characters in a string. s = val.s try: if begin < 0: # It could be negative if we compute unicode length, but that's # confusing. # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The start index of a string slice can't be negative: %d", begin, part=part) byte_begin = libstr.AdvanceUtf8Chars(s, begin, 0) if length is None: byte_end = len(s) else: if length < 0: # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The length of a string slice can't be negative: %d", length, part=part) byte_end = libstr.AdvanceUtf8Chars( s, length, byte_begin) except (util.InvalidSlice, util.InvalidUtf8) as e: if self.exec_opts.strict_word_eval: raise else: # TODO: # - We don't see the error location here, but we see it when set # -o strict-word-eval. # - Doesn't make the command exit with 1. It just sets the word # to empty string. util.warn(e.UserErrorString()) substr = '' # error condition else: substr = s[byte_begin:byte_end] val = runtime.Str(substr) elif val.tag == value_e.StrArray: # Slice array entries. # NOTE: unset elements don't count towards the length. strs = [] for s in val.strs[begin:]: if s is not None: strs.append(s) if len( strs ) == length: # never true for unspecified length break val = runtime.StrArray(strs) else: raise AssertionError( val.__class__.__name__) # Not possible # After applying suffixes, process maybe_decay_array here. if maybe_decay_array and val.tag == value_e.StrArray: val = self._DecayArray(val) # For the case where there are no prefix or suffix ops. val = self._EmptyStrOrError(val) # For example, ${a} evaluates to value_t.Str(), but we want a # part_value.StringPartValue. part_val = _ValueToPartValue(val, quoted) part_vals.append(part_val)
def _EvalBracedVarSub(self, part, part_vals, quoted): """ Args: part_vals: output param to append to. """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process decay_array here before returning. decay_array = False # for $*, ${a[*]}, etc. var_name = None # For ${foo=default} # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.GetVar(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, decay_array = self._EvalSpecialVar(part.token.id, quoted) # 2. Bracket: value -> (value v, bool decay_array) # decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER suffix ops # are applied. If we take the length with a prefix op, the distinction is # ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with @: %r", val, part=part) elif val.tag == value_e.StrArray: val = runtime.StrArray(val.strs) elif op_id == Id.Arith_Star: decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with *: %r", val, part=part) elif val.tag == value_e.StrArray: # Always decay_array with ${a[*]} or "${a[*]}" val = runtime.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr index = self.arith_ev.Eval(anode) if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # TODO: Implement this as an extension. Requires unicode support. # Bash treats it as an array. e_die("Can't index string %r with integer", part.token.val) elif val.tag == value_e.StrArray: try: s = val.strs[index] except IndexError: val = runtime.Undef() else: val = runtime.Str(s) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op) # At least for length, we can't have a test or suffix afterward. elif part.suffix_op: op = part.suffix_op if op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # TODO: Change style to: # if self._ApplyTestOp(...) # return # It should return whether anything was done. If not, we continue to # the end, where we might throw an error. assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op, quoted, part_vals) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == Effect.SpliceParts: return # EARLY RETURN, part_vals mutated elif effect == Effect.SpliceAndAssign: if var_name is None: # TODO: error context e_die("Can't assign to special variable") else: # NOTE: This decays arrays too! 'set -o strict_array' could # avoid it. rhs_str = _DecayPartValuesToString(assign_part_vals, self.splitter.GetJoinChar()) state.SetLocalString(self.mem, var_name, rhs_str) return # EARLY RETURN, part_vals mutated elif effect == Effect.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyUnarySuffixOp(val, part.suffix_op) elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized val = self._EmptyStrOrError(val) pat_val = self.EvalWordToString(op.pat, do_fnmatch=True) assert pat_val.tag == value_e.Str, pat_val if op.replace: replace_val = self.EvalWordToString(op.replace, do_fnmatch=True) assert replace_val.tag == value_e.Str, replace_val replace_str = replace_val.s else: replace_str = '' pat = pat_val.s if val.tag == value_e.Str: s = libstr.PatSub(val.s, op, pat, replace_str) val = runtime.Str(s) elif val.tag == value_e.StrArray: strs = [] for s in val.strs: strs.append(libstr.PatSub(s, op, pat, replace_str)) val = runtime.StrArray(strs) else: raise AssertionError(val.__class__.__name__) elif op.tag == suffix_op_e.Slice: # NOTE: The beginning can be negative, but Python handles this. Might # want to make it explicit. # TODO: Check out of bounds errors? begin > end? if op.begin: begin = self.arith_ev.Eval(op.begin) else: begin = 0 if op.length: length = self.arith_ev.Eval(op.length) end = begin + length else: end = None # Python supports None as the end if val.tag == value_e.Str: # Slice characters in a string. # TODO: Need to support unicode? Write spec # tests. val = runtime.Str(val.s[begin : end]) elif val.tag == value_e.StrArray: # Slice array entries. val = runtime.StrArray(val.strs[begin : end]) else: raise AssertionError(val.__class__.__name__) # After applying suffixes, process decay_array here. if decay_array: val = self._DecayArray(val) # No prefix or suffix ops val = self._EmptyStrOrError(val) # For example, ${a} evaluates to value_t.Str(), but we want a # part_value.StringPartValue. part_val = _ValueToPartValue(val, quoted) part_vals.append(part_val)
def _Dispatch(self, node, fork_external): argv0 = None # for error message check_errexit = True # for errexit if node.tag == command_e.SimpleCommand: # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up loggnig. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.ev.EvalWordSequence(words) if argv: argv0 = argv[0] environ = self.mem.GetExported() self._EvalEnv(node.more_env, environ) if self.exec_opts.xtrace: log('+ %s', argv) #print('+ %s' % argv, file=sys.stderr) #print('+ %s' % argv, file=self.XFILE) #os.write(2, '+ %s\n' % argv) status = self._RunSimpleCommand(argv, environ, fork_external) if self.exec_opts.xtrace: #log('+ %s -> %d', argv, status) pass elif node.tag == command_e.Sentence: if node.terminator.id == Id.Op_Semi: # Don't check_errexit since this isn't a real node! check_errexit = False status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.child) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: pairs = [] if node.keyword == Id.Assign_Local: lookup_mode = scope.LocalOnly flags = () elif node.keyword == Id.Assign_Declare: # declare is like local, except it can also be used outside functions? lookup_mode = scope.LocalOnly # TODO: Respect flags. -r and -x matter, but -a and -A might be # implicit in the RHS? flags = () elif node.keyword == Id.Assign_Readonly: lookup_mode = scope.Dynamic flags = (var_flags.ReadOnly,) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope.Dynamic flags = () else: # TODO: typeset, declare, etc. Those are dynamic though. raise NotImplementedError(node.keyword) for pair in node.pairs: if pair.rhs: # RHS can be a string or array. val = self.ev.EvalWordToAny(pair.rhs) assert isinstance(val, runtime.value), val else: # 'local x' is equivalent to local x="" val = runtime.Str('') if pair.op == assign_op.PlusEqual: old_val, lval = expr_eval.EvalLhs(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Str, value_e.Str): val = runtime.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = runtime.StrArray(old_val.strs + val.strs) else: lval = self._EvalLhs(pair.lhs) #log('ASSIGNING %s -> %s', lval, val) self.mem.SetVar(lval, val, flags, lookup_mode) # TODO: This should be eval of RHS, unlike bash! status = 0 elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, break 0 levels, etc. # NOTE: always raises so we don't set status. raise _ControlFlow(node.token, arg) # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children # This is everything except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() if node.op_id == Id.Op_DPipe: if status != 0: status = self._Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status = self._Execute(right) else: raise AssertionError elif node.tag in (command_e.While, command_e.Until): # TODO: Compile this out? if node.tag == command_e.While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForExpr: raise NotImplementedError(node.tag) elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? pat_val = self.ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime print('real\t%.3f' % real, file=sys.stderr) print('user\t%.3f' % user, file=sys.stderr) print('sys\t%.3f' % sys_, file=sys.stderr) else: raise AssertionError(node.tag) return status, check_errexit
def _EvalBracedVarSub(self, part, quoted): """ Returns: part_value[] """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process decay_array here before returning. decay_array = False # for $*, ${a[*]}, etc. var_name = None # For ${foo=default} # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.GetVar(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, decay_array = self._EvalSpecialVar(part.token.id, quoted) # 2. Bracket: value -> (value v, bool decay_array) # decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER suffix ops # are applied. If we take the length with a prefix op, the distinction is # ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: raise RuntimeError("Can't index string with @") elif val.tag == value_e.StrArray: val = runtime.StrArray(val.strs) elif op_id == Id.Arith_Star: decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: raise RuntimeError("Can't index string with *") elif val.tag == value_e.StrArray: # Always decay_array with ${a[*]} or "${a[*]}" val = runtime.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr index = self.arith_ev.Eval(anode) if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # TODO: Implement this as an extension. Requires unicode support. # Bash treats it as an array. e_die("Can't index string %r with integer", part.token.val) elif val.tag == value_e.StrArray: try: s = val.strs[index] except IndexError: val = runtime.Undef() else: val = runtime.Str(s) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op) # At least for length, we can't have a test or suffix afterward. elif part.suffix_op: out_part_vals = [] op = part.suffix_op if op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # VTest: value -> part_value[] new_part_vals, effect = self._ApplyTestOp( val, part.suffix_op, quoted) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == Effect.SpliceParts: return new_part_vals # EARLY RETURN elif effect == Effect.SpliceAndAssign: if var_name is None: # TODO: error context e_die("Can't assign to special variable") else: # NOTE: This decays arrays too! 'set -o strict_array' could # avoid it. rhs_str = _DecayPartValuesToString( new_part_vals, _GetJoinChar(self.mem)) state.SetLocalString(self.mem, var_name, rhs_str) return new_part_vals elif effect == Effect.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) #out_part_vals.append(part_val) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyUnarySuffixOp(val, part.suffix_op) elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized pat_val = self.word_ev.EvalWordToString(op.pat, do_fnmatch=True) assert pat_val.tag == value_e.Str, pat_val if op.replace: replace_val = self.word_ev.EvalWordToString( op.replace, do_fnmatch=True) assert replace_val.tag == value_e.Str, replace_val replace_str = replace_val.s else: replace_str = '' pat = pat_val.s if val.tag == value_e.Str: s = self._PatSub(val.s, op, pat, replace_str) val = runtime.Str(s) elif val.tag == value_e.StrArray: strs = [] for s in val.strs: strs.append(self._PatSub(s, op, pat, replace_str)) val = runtime.StrArray(strs) else: raise AssertionError(val.tag) elif op.tag == suffix_op_e.Slice: # Either string slicing or array slicing. However string slicing has # a unicode problem? # Or maybe have a different operator for byte slice and char slice. raise NotImplementedError(op) # After applying suffixes, process decay_array here. if decay_array: val = self._DecayArray(val) # No prefix or suffix ops val = self._EmptyStrOrError(val) return [_ValueToPartValue(val, quoted)]