def _EvalSpecialVar(self, op_id, quoted): """Returns (val, bool maybe_decay_array). TODO: Should that boolean be part of the value? """ # $@ is special -- it need to know whether it is in a double quoted # context. # # - If it's $@ in a double quoted context, return an ARRAY. # - If it's $@ in a normal context, return a STRING, which then will be # subject to splitting. if op_id in (Id.VSub_At, Id.VSub_Star): argv = self.mem.GetArgv() val = value.StrArray(argv) if op_id == Id.VSub_At: # "$@" evaluates to an array, $@ should be decayed return val, not quoted else: # $@ $* "$*" return val, True elif op_id == Id.VSub_Hyphen: s = self.exec_opts.GetDollarHyphen() return value.Str(s), False else: val = self.mem.GetSpecialVar(op_id) return val, False # don't decay
def _ApplyUnarySuffixOp(self, val, op): assert val.tag != value_e.Undef op_kind = LookupKind(op.op_id) if op_kind == Kind.VOp1: #log('%s', op) arg_val = self.EvalWordToString(op.arg_word, do_fnmatch=True) assert arg_val.tag == value_e.Str if val.tag == value_e.Str: s = string_ops.DoUnarySuffixOp(val.s, op, arg_val.s) new_val = value.Str(s) else: # val.tag == value_e.StrArray: # ${a[@]#prefix} is VECTORIZED on arrays. Oil should have this too. strs = [] for s in val.strs: if s is not None: strs.append(string_ops.DoUnarySuffixOp(s, op, arg_val.s)) new_val = value.StrArray(strs) else: raise AssertionError(op_kind) return new_val
def SetArrayDynamic(mem, name, a): """Set an array by looking up the stack. Used for _init_completion. """ assert isinstance(a, list) mem.SetVar(lhs_expr.LhsName(name), value.StrArray(a), (), scope_e.Dynamic)
def __call__(self, cmd_val): arg_r = args.Reader(cmd_val.argv, spids=cmd_val.arg_spids) arg_r.Next() arg, arg_index = READONLY_SPEC.Parse(arg_r) for pair in cmd_val.pairs: if pair.rval is None: if arg.a: rval = value.StrArray([]) elif arg.A: rval = value.AssocArray({}) else: rval = None else: rval = pair.rval if not _CheckType(rval, arg, self.errfmt, pair.spid): return 1 # NOTE: # - when rval is None, only flags are changed # - dynamic scope because flags on locals can be changed, etc. self.mem.SetVar(pair.lval, rval, (var_flags_e.ReadOnly, ), scope_e.Dynamic) return 0
def _BindNewArrayWithEntry(self, namespace, lval, val, new_flags): """Fill 'namespace' with a new indexed array entry.""" items = [None] * lval.index items.append(val.s) new_value = value.StrArray(items) # arrays can't be exported; can't have AssocArray flag readonly = var_flags_e.ReadOnly in new_flags namespace[lval.name] = runtime_asdl.cell(new_value, False, readonly, False)
def EvalRhsWord(self, word): """syntax.word -> value Used for RHS of assignment. There is no splitting. """ if word.tag == word_e.EmptyWord: return value.Str('') # Special case for a=(1 2). ArrayLiteralPart won't appear in words that # don't look like assignments. if (len(word.parts) == 1 and word.parts[0].tag == word_part_e.ArrayLiteralPart): array_words = word.parts[0].words words = braces.BraceExpandWords(array_words) strs = self._EvalWordSequence(words) #log('ARRAY LITERAL EVALUATED TO -> %s', strs) return value.StrArray(strs) # If RHS doens't look like a=( ... ), then it must be a string. return self.EvalWordToString(word)
def _EvalIndirectArrayExpansion(self, name, index): """Expands ${!ref} when $ref has the form `name[index]`. Args: name, index: arbitrary strings Returns: value, or None if invalid """ if not match.IsValidVarName(name): return None val = self.mem.GetVar(name) if val.tag == value_e.StrArray: if index in ('@', '*'): # TODO: maybe_decay_array return value.StrArray(val.strs) try: index_num = int(index) except ValueError: return None try: return value.Str(val.strs[index_num]) except IndexError: return value.Undef() elif val.tag == value_e.AssocArray: if index in ('@', '*'): raise NotImplementedError try: return value.Str(val.d[index]) except KeyError: return value.Undef() elif val.tag == value_e.Undef: return value.Undef() elif val.tag == value_e.Str: return None else: raise AssertionError
def _EvalBracedVarSub(self, part, part_vals, quoted): """ Args: part_vals: output param to append to. """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool maybe_decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process maybe_decay_array here before returning. maybe_decay_array = False # for $*, ${a[*]}, etc. var_name = None # For ${foo=default} # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.GetVar(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted) # 2. Bracket: value -> (value v, bool maybe_decay_array) # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER # suffix ops are applied. If we take the length with a prefix op, the # distinction is ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: maybe_decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with @: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. val = value.StrArray(val.strs) elif op_id == Id.Arith_Star: maybe_decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with *: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. # ${a[*]} or "${a[*]}" : maybe_decay_array is always true val = value.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # Bash treats any string as an array, so we can't add our own # behavior here without making valid OSH invalid bash. e_die("Can't index string %r with integer", part.token.val, token=part.token) elif val.tag == value_e.StrArray: index = self.arith_ev.Eval(anode) try: # could be None because representation is sparse s = val.strs[index] except IndexError: s = None if s is None: val = value.Undef() else: val = value.Str(s) elif val.tag == value_e.AssocArray: key = self.arith_ev.Eval(anode, int_coerce=False) try: val = value.Str(val.d[key]) except KeyError: val = value.Undef() else: raise AssertionError(val.__class__.__name__) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op, token=part.token) # NOTE: When applying the length operator, we can't have a test or # suffix afterward. And we don't want to decay the array elif part.suffix_op: op = part.suffix_op if op.tag == suffix_op_e.StringNullary: if op.op_id == Id.VOp0_P: prompt = self.prompt_ev.EvalPrompt(val) val = value.Str(prompt) elif op.op_id == Id.VOp0_Q: val = value.Str(string_ops.ShellQuote(val.s)) else: raise NotImplementedError(op.op_id) elif op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # TODO: Change style to: # if self._ApplyTestOp(...) # return # It should return whether anything was done. If not, we continue to # the end, where we might throw an error. assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op, quoted, part_vals) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == effect_e.SpliceParts: return # EARLY RETURN, part_vals mutated elif effect == effect_e.SpliceAndAssign: if var_name is None: # TODO: error context e_die("Can't assign to special variable") else: # NOTE: This decays arrays too! 'set -o strict_array' could # avoid it. rhs_str = _DecayPartValuesToString(assign_part_vals, self.splitter.GetJoinChar()) state.SetLocalString(self.mem, var_name, rhs_str) return # EARLY RETURN, part_vals mutated elif effect == effect_e.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyUnarySuffixOp(val, part.suffix_op) elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized val = self._EmptyStrOrError(val) # ${undef//x/y} # globs are supported in the pattern pat_val = self.EvalWordToString(op.pat, do_fnmatch=True) assert pat_val.tag == value_e.Str, pat_val if op.replace: replace_val = self.EvalWordToString(op.replace) assert replace_val.tag == value_e.Str, replace_val replace_str = replace_val.s else: replace_str = '' regex, warnings = glob_.GlobToERE(pat_val.s) if warnings: # TODO: # - Add 'set -o strict-glob' mode and expose warnings. # "Glob is not in CANONICAL FORM". # - Propagate location info back to the 'op.pat' word. pass replacer = string_ops.GlobReplacer(regex, replace_str, op.spids[0]) if val.tag == value_e.Str: s = replacer.Replace(val.s, op) val = value.Str(s) elif val.tag == value_e.StrArray: strs = [] for s in val.strs: if s is not None: strs.append(replacer.Replace(s, op)) val = value.StrArray(strs) else: raise AssertionError(val.__class__.__name__) elif op.tag == suffix_op_e.Slice: val = self._EmptyStrOrError(val) # ${undef:3:1} if op.begin: begin = self.arith_ev.Eval(op.begin) else: begin = 0 if op.length: length = self.arith_ev.Eval(op.length) else: length = None if val.tag == value_e.Str: # Slice UTF-8 characters in a string. s = val.s try: if begin < 0: # It could be negative if we compute unicode length, but that's # confusing. # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The start index of a string slice can't be negative: %d", begin, part=part) byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0) if length is None: byte_end = len(s) else: if length < 0: # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The length of a string slice can't be negative: %d", length, part=part) byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin) except (util.InvalidSlice, util.InvalidUtf8) as e: if self.exec_opts.strict_word_eval: raise else: # TODO: # - We don't see the error location here, but we see it when set # -o strict-word-eval. # - Doesn't make the command exit with 1. It just sets the word # to empty string. util.warn(e.UserErrorString()) substr = '' # error condition else: substr = s[byte_begin : byte_end] val = value.Str(substr) elif val.tag == value_e.StrArray: # Slice array entries. # NOTE: unset elements don't count towards the length. strs = [] for s in val.strs[begin:]: if s is not None: strs.append(s) if len(strs) == length: # never true for unspecified length break val = value.StrArray(strs) else: raise AssertionError(val.__class__.__name__) # Not possible # After applying suffixes, process maybe_decay_array here. if maybe_decay_array and val.tag == value_e.StrArray: val = self._DecayArray(val) # For the case where there are no prefix or suffix ops. val = self._EmptyStrOrError(val) # For example, ${a} evaluates to value_t.Str(), but we want a # part_value.StringPartValue. part_val = _ValueToPartValue(val, quoted) part_vals.append(part_val)
def _EmptyStrArrayOrError(self, token): assert token is not None if self.exec_opts.nounset: e_die('Undefined array %r', token.val, token=token) else: return value.StrArray([])
def _ApplyPrefixOp(self, val, op_id, token): """ Returns: value """ assert val.tag != value_e.Undef if op_id == Id.VSub_Pound: # LENGTH if val.tag == value_e.Str: # NOTE: Whether bash counts bytes or chars is affected by LANG # environment variables. # Should we respect that, or another way to select? set -o # count-bytes? # https://stackoverflow.com/questions/17368067/length-of-string-in-bash try: length = string_ops.CountUtf8Chars(val.s) except util.InvalidUtf8 as e: # TODO: Add location info from 'part'? Only the caller has it. if self.exec_opts.strict_word_eval: raise else: # NOTE: Doesn't make the command exit with 1; it just returns a # length of -1. util.warn(e.UserErrorString()) return value.Str('-1') elif val.tag == value_e.StrArray: # There can be empty placeholder values in the array. length = sum(1 for s in val.strs if s is not None) return value.Str(str(length)) elif op_id == Id.VSub_Bang: # ${!foo}, "indirect expansion" # NOTES: # - Could translate to eval('$' + name) or eval("\$$name") # - ${!array[@]} means something completely different. TODO: implement # that. # - It might make sense to suggest implementing this with associative # arrays? if val.tag == value_e.Str: # plain variable name, like 'foo' if match.IsValidVarName(val.s): return self.mem.GetVar(val.s) # positional argument, like '1' try: return self.mem.GetArgNum(int(val.s)) except ValueError: pass if val.s in ('@', '*'): # TODO maybe_decay_array return value.StrArray(self.mem.GetArgv()) # otherwise an array reference, like 'arr[0]' or 'arr[xyz]' or 'arr[@]' i = val.s.find('[') if i >= 0 and val.s[-1] == ']': name, index = val.s[:i], val.s[i+1:-1] result = self._EvalIndirectArrayExpansion(name, index) if result is not None: return result # Note that bash doesn't consider this fatal. It makes the # command exit with '1', but we don't have that ability yet? e_die('Bad indirect expansion: %r', val.s, token=token) elif val.tag == value_e.StrArray: indices = [str(i) for i, s in enumerate(val.strs) if s is not None] return value.StrArray(indices) else: raise AssertionError else: raise AssertionError(op_id)
def __call__(self, cmd_val): arg_r = args.Reader(cmd_val.argv, spids=cmd_val.arg_spids) arg_r.Next() arg, arg_index = NEW_VAR_SPEC.Parse(arg_r) status = 0 # NOTE: in bash, -f shows the function body, while -F shows the name. In # osh, they're identical and behave like -F. if arg.f or arg.F: # Lookup and print functions. names = [pair.lval.name for pair in cmd_val.pairs] if names: for name in names: if name in self.funcs: print(name) # TODO: Could print LST, or render LST. Bash does this. 'trap' too. #print(funcs[name]) else: status = 1 elif arg.F: for func_name in sorted(self.funcs): print('declare -f %s' % (func_name)) else: raise args.UsageError('declare/typeset -f without args') return status if arg.p: # Lookup and print variables. names = [pair.lval.name for pair in cmd_val.pairs] if names: for name in names: val = self.mem.GetVar(name) if val.tag != value_e.Undef: # TODO: Print flags. print(name) else: status = 1 else: raise args.UsageError('declare/typeset -p without args') return status # # Set variables # #raise args.UsageError("doesn't understand %s" % cmd_val.argv[1:]) if cmd_val.builtin_id == builtin_e.LOCAL: lookup_mode = scope_e.LocalOnly else: # declare/typeset if arg.g: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly flags_to_set = [] if arg.x == '-': flags_to_set.append(var_flags_e.Exported) if arg.r == '-': flags_to_set.append(var_flags_e.ReadOnly) flags_to_clear = [] if arg.x == '+': flags_to_clear.append(var_flags_e.Exported) if arg.r == '+': flags_to_clear.append(var_flags_e.ReadOnly) for pair in cmd_val.pairs: if pair.rval is None: if arg.a: rval = value.StrArray([]) elif arg.A: rval = value.AssocArray({}) else: rval = None else: rval = pair.rval if not _CheckType(rval, arg, self.errfmt, pair.spid): return 1 self.mem.SetVar(pair.lval, rval, flags_to_set, lookup_mode, flags_to_clear=flags_to_clear) return status
def testSetVarClearFlag(self): mem = _InitMem() print(mem) mem.PushCall('my-func', 0, ['ONE']) self.assertEqual(2, len(mem.var_stack)) # internal details # local x=y mem.SetVar( lvalue.LhsName('x'), value.Str('y'), (), scope_e.LocalOnly) self.assertEqual('y', mem.var_stack[-1]['x'].val.s) # New frame mem.PushCall('my-func', 0, ['TWO']) self.assertEqual(3, len(mem.var_stack)) # internal details # x=y -- test out dynamic scope mem.SetVar( lvalue.LhsName('x'), value.Str('YYY'), (), scope_e.Dynamic) self.assertEqual('YYY', mem.var_stack[-2]['x'].val.s) self.assertEqual(None, mem.var_stack[-1].get('x')) # myglobal=g mem.SetVar( lvalue.LhsName('myglobal'), value.Str('g'), (), scope_e.Dynamic) self.assertEqual('g', mem.var_stack[0]['myglobal'].val.s) self.assertEqual(False, mem.var_stack[0]['myglobal'].exported) # 'export PYTHONPATH=/' mem.SetVar( lvalue.LhsName('PYTHONPATH'), value.Str('/'), (var_flags_e.Exported,), scope_e.Dynamic) self.assertEqual('/', mem.var_stack[0]['PYTHONPATH'].val.s) self.assertEqual(True, mem.var_stack[0]['PYTHONPATH'].exported) ex = mem.GetExported() self.assertEqual('/', ex['PYTHONPATH']) mem.SetVar( lvalue.LhsName('PYTHONPATH'), None, (var_flags_e.Exported,), scope_e.Dynamic) self.assertEqual(True, mem.var_stack[0]['PYTHONPATH'].exported) # 'export myglobal'. None means don't touch it. Undef would be confusing # because it might mean "unset", but we have a separated API for that. mem.SetVar( lvalue.LhsName('myglobal'), None, (var_flags_e.Exported,), scope_e.Dynamic) self.assertEqual(True, mem.var_stack[0]['myglobal'].exported) # export g2 -- define and export empty mem.SetVar( lvalue.LhsName('g2'), None, (var_flags_e.Exported,), scope_e.Dynamic) self.assertEqual(value_e.Undef, mem.var_stack[0]['g2'].val.tag) self.assertEqual(True, mem.var_stack[0]['g2'].exported) # readonly myglobal self.assertEqual(False, mem.var_stack[0]['myglobal'].readonly) mem.SetVar( lvalue.LhsName('myglobal'), None, (var_flags_e.ReadOnly,), scope_e.Dynamic) self.assertEqual(True, mem.var_stack[0]['myglobal'].readonly) mem.SetVar( lvalue.LhsName('PYTHONPATH'), value.Str('/lib'), (), scope_e.Dynamic) self.assertEqual('/lib', mem.var_stack[0]['PYTHONPATH'].val.s) self.assertEqual(True, mem.var_stack[0]['PYTHONPATH'].exported) # COMPREPLY=(1 2 3) # invariant to enforce: arrays can't be exported mem.SetVar( lvalue.LhsName('COMPREPLY'), value.StrArray(['1', '2', '3']), (), scope_e.GlobalOnly) self.assertEqual( ['1', '2', '3'], mem.var_stack[0]['COMPREPLY'].val.strs) # export COMPREPLY try: mem.SetVar( lvalue.LhsName('COMPREPLY'), None, (var_flags_e.Exported,), scope_e.Dynamic) except util.FatalRuntimeError as e: pass else: self.fail("Expected failure") # readonly r=1 mem.SetVar( lvalue.LhsName('r'), value.Str('1'), (var_flags_e.ReadOnly,), scope_e.Dynamic) self.assertEqual('1', mem.var_stack[0]['r'].val.s) self.assertEqual(False, mem.var_stack[0]['r'].exported) self.assertEqual(True, mem.var_stack[0]['r'].readonly) print(mem) # r=newvalue try: mem.SetVar( lvalue.LhsName('r'), value.Str('newvalue'), (), scope_e.Dynamic) except util.FatalRuntimeError as e: pass else: self.fail("Expected failure") # readonly r2 -- define empty readonly mem.SetVar( lvalue.LhsName('r2'), None, (var_flags_e.ReadOnly,), scope_e.Dynamic) self.assertEqual(value_e.Undef, mem.var_stack[0]['r2'].val.tag) self.assertEqual(True, mem.var_stack[0]['r2'].readonly) # export -n PYTHONPATH # Remove the exported property. NOTE: scope is LocalOnly for Oil? self.assertEqual(True, mem.var_stack[0]['PYTHONPATH'].exported) mem.ClearFlag('PYTHONPATH', var_flags_e.Exported, scope_e.Dynamic) self.assertEqual(False, mem.var_stack[0]['PYTHONPATH'].exported) lhs = lvalue.LhsIndexedName('a', 1) lhs.spids.append(0) # a[1]=2 mem.SetVar(lhs, value.Str('2'), (), scope_e.Dynamic) self.assertEqual([None, '2'], mem.var_stack[0]['a'].val.strs) # a[1]=3 mem.SetVar(lhs, value.Str('3'), (), scope_e.Dynamic) self.assertEqual([None, '3'], mem.var_stack[0]['a'].val.strs) # a[1]=(x y z) # illegal try: mem.SetVar(lhs, value.StrArray(['x', 'y', 'z']), (), scope_e.Dynamic) except util.FatalRuntimeError as e: pass else: self.fail("Expected failure") # readonly a mem.SetVar( lvalue.LhsName('a'), None, (var_flags_e.ReadOnly,), scope_e.Dynamic) self.assertEqual(True, mem.var_stack[0]['a'].readonly) try: # a[1]=3 mem.SetVar(lhs, value.Str('3'), (), scope_e.Dynamic) except util.FatalRuntimeError as e: pass else: self.fail("Expected failure")
def GetVar(self, name, lookup_mode=scope_e.Dynamic): assert isinstance(name, str), name # TODO: Short-circuit down to _FindCellAndNamespace by doing a single hash # lookup: # COMPUTED_VARS = {'PIPESTATUS': 1, 'FUNCNAME': 1, ...} # if name not in COMPUTED_VARS: ... if name == 'PIPESTATUS': return value.StrArray([str(i) for i in self.pipe_status[-1]]) # Do lookup of system globals before looking at user variables. Note: we # could optimize this at compile-time like $?. That would break # ${!varref}, but it's already broken for $?. if name == 'FUNCNAME': # bash wants it in reverse order. This is a little inefficient but we're # not depending on deque(). strs = [] for func_name, source_name, _, _, _ in reversed(self.debug_stack): if func_name: strs.append(func_name) if source_name: strs.append('source') # bash doesn't give name # Temp stacks are ignored if self.has_main: strs.append('main') # bash does this return value.StrArray(strs) # TODO: Reuse this object too? # This isn't the call source, it's the source of the function DEFINITION # (or the sourced # file itself). if name == 'BASH_SOURCE': return value.StrArray(list(reversed(self.bash_source))) # This is how bash source SHOULD be defined, but it's not! if name == 'CALL_SOURCE': strs = [] for func_name, source_name, call_spid, _, _ in reversed(self.debug_stack): # should only happen for the first entry if call_spid == const.NO_INTEGER: continue span = self.arena.GetLineSpan(call_spid) source_str = self.arena.GetLineSourceString(span.line_id) strs.append(source_str) if self.has_main: strs.append('-') # Bash does this to line up with main? return value.StrArray(strs) # TODO: Reuse this object too? if name == 'BASH_LINENO': strs = [] for _, _, call_spid, _, _ in reversed(self.debug_stack): # should only happen for the first entry if call_spid == const.NO_INTEGER: continue span = self.arena.GetLineSpan(call_spid) line_num = self.arena.GetLineNumber(span.line_id) strs.append(str(line_num)) if self.has_main: strs.append('0') # Bash does this to line up with main? return value.StrArray(strs) # TODO: Reuse this object too? if name == 'LINENO': span = self.arena.GetLineSpan(self.current_spid) # TODO: maybe use interned GetLineNumStr? s = str(self.arena.GetLineNumber(span.line_id)) # Perf bug: why is this slow? Commenting it out reduces line count by if 1: self.line_num.s = s # Python's configure takes 75 seconds! else: # WTF this does not show the per bug? self.line_num.s2 = s # Python's configure takes 13 seconds! return self.line_num # This is OSH-specific. Get rid of it in favor of ${BASH_SOURCE[0]} ? if name == 'SOURCE_NAME': # Update and reuse an object. span = self.arena.GetLineSpan(self.current_spid) self.source_name.s = self.arena.GetLineSourceString(span.line_id) return self.source_name cell, _ = self._FindCellAndNamespace(name, lookup_mode, writing=False) if cell: return cell.val return value.Undef()
def SetGlobalArray(mem, name, a): """Helper for completion.""" assert isinstance(a, list) mem.SetVar(lhs_expr.LhsName(name), value.StrArray(a), (), scope_e.GlobalOnly)
def _Dispatch(self, node, fork_external): # If we call RunCommandSub in a recursive call to the executor, this will # be set true (if strict-errexit is false). But it only lasts for one # command. self.check_command_sub_status = False #argv0 = None # for error message check_errexit = False # for errexit if node.tag == command_e.SimpleCommand: check_errexit = True # Find span_id for a basic implementation of $LINENO, e.g. # PS4='+$SOURCE_NAME:$LINENO:' # NOTE: osh2oil uses node.more_env, but we don't need that. span_id = const.NO_INTEGER if node.words: first_word = node.words[0] span_id = word.LeftMostSpanForWord(first_word) self.mem.SetCurrentSpanId(span_id) # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up logging. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.word_ev.EvalWordSequence(words) # This comes before evaluating env, in case there are problems evaluating # it. We could trace the env separately? Also trace unevaluated code # with set-o verbose? self.tracer.OnSimpleCommand(argv) # NOTE: RunSimpleCommand never returns when fork_external=False! if node.more_env: # I think this guard is necessary? self.mem.PushTemp() try: self._EvalTempEnv(node.more_env) status = self.RunSimpleCommand(argv, fork_external, span_id) finally: self.mem.PopTemp() else: status = self.RunSimpleCommand(argv, fork_external, span_id) elif node.tag == command_e.ExpandedAlias: # Expanded aliases need redirects and env bindings from the calling # context, as well as redirects in the expansion! # TODO: SetCurrentSpanId to OUTSIDE? Don't bother with stuff inside # expansion, since aliase are discouarged. if node.more_env: self.mem.PushTemp() try: self._EvalTempEnv(node.more_env) status = self._Execute(node.child) finally: self.mem.PopTemp() else: status = self._Execute(node.child) elif node.tag == command_e.Sentence: # Don't check_errexit since this isn't a real node! if node.terminator.id == Id.Op_Semi: status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: check_errexit = True if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: check_errexit = True # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.command_list) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: check_errexit = True result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: check_errexit = True i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: # TODO: Also do dynamic assignment here flags = word_compile.ParseAssignFlags(node.flags) if node.keyword == Id.Assign_Local: lookup_mode = scope_e.LocalOnly # typeset and declare are synonyms? I see typeset -a a=() the most. elif node.keyword in (Id.Assign_Declare, Id.Assign_Typeset): # declare is like local, except it can also be used outside functions? if var_flags_e.Global in flags: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly elif node.keyword == Id.Assign_Readonly: lookup_mode = scope_e.Dynamic flags.append(var_flags_e.ReadOnly) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope_e.Dynamic else: raise AssertionError(node.keyword) for pair in node.pairs: if pair.op == assign_op_e.PlusEqual: assert pair.rhs, pair.rhs # I don't think a+= is valid? val = self.word_ev.EvalRhsWord(pair.rhs) old_val, lval = expr_eval.EvalLhsAndLookup(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Undef, value_e.Str): pass # val is RHS elif sig == (value_e.Undef, value_e.StrArray): pass # val is RHS elif sig == (value_e.Str, value_e.Str): val = value.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = value.StrArray(old_val.strs + val.strs) else: # plain assignment spid = pair.spids[0] # Source location for tracing lval = self._EvalLhs(pair.lhs, spid, lookup_mode) # RHS can be a string or array. if pair.rhs: val = self.word_ev.EvalRhsWord(pair.rhs) assert isinstance(val, value_t), val else: # e.g. 'readonly x' or 'local x' val = None # NOTE: In bash and mksh, declare -a myarray makes an empty cell with # Undef value, but the 'array' attribute. #log('setting %s to %s with flags %s', lval, val, flags) self.mem.SetVar(lval, val, flags, lookup_mode) # Assignment always appears to have a spid. if node.spids: current_spid = node.spids[0] else: current_spid = const.NO_INTEGER self.mem.SetCurrentSpanId(current_spid) self.tracer.OnAssignment(lval, pair.op, val, flags, lookup_mode) # PATCH to be compatible with existing shells: If the assignment had a # command sub like: # # s=$(echo one; false) # # then its status will be in mem.last_status, and we can check it here. # If there was NOT a command sub in the assignment, then we don't want to # check it. if node.keyword == Id.Assign_None: # mutate existing local or global # Only do this if there was a command sub? How? Look at node? # Set a flag in mem? self.mem.last_status or if self.check_command_sub_status: self._CheckStatus(self.mem.last_status, node) # A global assignment shouldn't clear $?. status = self.mem.last_status else: status = 0 else: # To be compatible with existing shells, local assignments DO clear # $?. Even in strict mode, we don't need to bother setting # check_errexit = True, because we would have already checked the # command sub in RunCommandSub. status = 0 # TODO: maybe we should have a "sane-status" that respects this: # false; echo $?; local f=x; echo $? elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.word_ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, exit 0, break 0 levels, etc. # NOTE: We don't do anything about a top-level 'return' here. Unlike in # bash, that is OK. If you can return from a sourced script, it makes # sense to return from a main script. ok = True tok = node.token if (tok.id in (Id.ControlFlow_Break, Id.ControlFlow_Continue) and self.loop_level == 0): ok = False msg = 'Invalid control flow at top level' if ok: raise _ControlFlow(tok, arg) if self.exec_opts.strict_control_flow: e_die(msg, token=tok) else: # Only print warnings, never fatal. # Bash oddly only exits 1 for 'return', but no other shell does. ui.PrintFilenameAndLine(tok.span_id, self.arena) util.warn(msg) status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) check_errexit = False elif node.tag == command_e.AndOr: # NOTE: && and || have EQUAL precedence in command mode. See case #13 # in dbracket.test.sh. left = node.children[0] # Suppress failure for every child except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() i = 1 n = len(node.children) while i < n: #log('i %d status %d', i, status) child = node.children[i] op_id = node.ops[i-1] #log('child %s op_id %s', child, op_id) if op_id == Id.Op_DPipe and status == 0: i += 1 continue # short circuit elif op_id == Id.Op_DAmp and status != 0: i += 1 continue # short circuit if i == n - 1: # errexit handled differently for last child status = self._Execute(child) check_errexit = True else: self._PushErrExit() try: status = self._Execute(child) finally: self._PopErrExit() i += 1 elif node.tag == command_e.WhileUntil: if node.keyword.id == Id.KW_While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 self.loop_level += 1 try: while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.word_ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop self.loop_level += 1 try: for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForExpr: status = 0 init, cond, body, update = node.init, node.cond, node.body, node.update if init: self.arith_ev.Eval(init) self.loop_level += 1 try: while True: if cond: b = self.arith_ev.Eval(cond) if not b: break try: status = self._Execute(body) except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise if update: self.arith_ev.Eval(update) finally: self.loop_level -= 1 elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) check_errexit = False # not real statements elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.word_ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? # TODO: case "$@") shouldn't succeed? That's a type error? # That requires strict-array? pat_val = self.word_ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? break # Only execute action ONCE if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime libc.print_time(real, user, sys_) else: raise NotImplementedError(node.__class__.__name__) return status, check_errexit
def GetVar(self, name, lookup_mode=scope_e.Dynamic): assert isinstance(name, str), name # Do lookup of system globals before looking at user variables. Note: we # could optimize this at compile-time like $?. That would break # ${!varref}, but it's already broken for $?. if name == 'FUNCNAME': # bash wants it in reverse order. This is a little inefficient but we're # not depending on deque(). strs = [] for func_name, source_name, _, _, _ in reversed(self.debug_stack): if func_name: strs.append(func_name) if source_name: strs.append('source') # bash doesn't give name # Temp stacks are ignored if self.has_main: strs.append('main') # bash does this return value.StrArray(strs) # TODO: Reuse this object too? # This isn't the call source, it's the source of the function DEFINITION # (or the sourced # file itself). if name == 'BASH_SOURCE': return value.StrArray(list(reversed(self.bash_source))) # This is how bash source SHOULD be defined, but it's not! if name == 'CALL_SOURCE': strs = [] for func_name, source_name, call_spid, _, _ in reversed( self.debug_stack): # should only happen for the first entry if call_spid == const.NO_INTEGER: continue span = self.arena.GetLineSpan(call_spid) path, _ = self.arena.GetDebugInfo(span.line_id) strs.append(path) if self.has_main: strs.append('-') # Bash does this to line up with main? return value.StrArray(strs) # TODO: Reuse this object too? if name == 'BASH_LINENO': strs = [] for func_name, source_name, call_spid, _, _ in reversed( self.debug_stack): # should only happen for the first entry if call_spid == const.NO_INTEGER: continue span = self.arena.GetLineSpan(call_spid) _, line_num = self.arena.GetDebugInfo(span.line_id) strs.append(str(line_num)) if self.has_main: strs.append('0') # Bash does this to line up with main? return value.StrArray(strs) # TODO: Reuse this object too? if name == 'LINENO': return self.line_num # This is OSH-specific. Get rid of it in favor of ${BASH_SOURCE[0]} ? if name == 'SOURCE_NAME': return self.source_name cell, _ = self._FindCellAndNamespace(name, lookup_mode, writing=False) if cell: return cell.val return value.Undef()