def Echo(argv): """ echo builtin. Doesn't depend on executor state. TODO: Where to put help? docstring? """ # NOTE: both getopt and optparse are unsuitable for 'echo' because: # - 'echo -c' should print '-c', not fail # - echo '---' should print ---, not fail arg, i = echo_spec.ParseLikeEcho(argv) if arg.e: util.warn('*** echo -e not implemented ***') #log('echo argv %s', argv) n = len(argv) for i in xrange(i, n - 1): sys.stdout.write(argv[i]) sys.stdout.write(' ') # arg separator if argv: sys.stdout.write(argv[-1]) if not arg.n: sys.stdout.write('\n') # Do I need the flush? Had a problem here under load, but it might not have # been because of that. # File "/home/andy/git/oil/bin/../core/cmd_exec.py", line 251, in _RunBuiltin # status = builtin.Echo(argv) # File "/home/andy/git/oil/bin/../core/builtin.py", line 431, in Echo # sys.stdout.flush() # IOError: [Errno 32] Broken pipe sys.stdout.flush() return 0
def _ValToArithOrError(self, val, int_coerce=True, blame_word=None, span_id=const.NO_INTEGER): if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) #log('_ValToArithOrError span=%s blame=%s', span_id, blame_word) try: i = self._ValToArith(val, span_id, int_coerce=int_coerce) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: i = 0 span_id = word.SpanIdFromError(e) if self.arena: # BoolEvaluator for test builtin doesn't have it. if span_id != const.NO_INTEGER: ui.PrintFilenameAndLine(span_id, self.arena) else: log('*** Warning has no location info ***') warn(e.UserErrorString()) return i
def Wait(self): # This is a list of async jobs try: pid, status = os.wait() except OSError as e: if e.errno == errno.ECHILD: #log('WAIT ECHILD') return False # nothing to wait for caller should stop else: # What else can go wrong? raise #log('WAIT got %s %s', pid, status) # TODO: change status in more cases. if os.WIFSIGNALED(status): pass elif os.WIFEXITED(status): status = os.WEXITSTATUS(status) #log('exit status: %s', status) # This could happen via coding error. But this may legitimately happen # if a grandchild outlives the child (its parent). Then it is reparented # under this process, so we might receive notification of its exit, even # though we didn't start it. We can't have any knowledge of such # processes, so print a warning. if pid not in self.callbacks: util.warn("PID %d stopped, but osh didn't start it", pid) return True # caller should keep waiting callback = self.callbacks.pop(pid) callback(pid, status) self.last_status = status # for wait -n return True # caller should keep waiting
def _EvalRedirect(self, n): fd = REDIR_DEFAULT_FD[n.op_id] if n.fd == const.NO_INTEGER else n.fd if n.tag == redir_e.Redir: redir_type = REDIR_ARG_TYPES[n.op_id] # could be static in the LST? if redir_type == redir_arg_type_e.Path: # NOTE: no globbing. You can write to a file called '*.py'. val = self.word_ev.EvalWordToString(n.arg_word) if val.tag != value_e.Str: # TODO: This error never fires util.error("Redirect filename must be a string, got %s", val) return None filename = val.s if not filename: # Whether this is fatal depends on errexit. util.error("Redirect filename can't be empty") return None return runtime.PathRedirect(n.op_id, fd, filename) elif redir_type == redir_arg_type_e.Desc: # e.g. 1>&2 val = self.word_ev.EvalWordToString(n.arg_word) if val.tag != value_e.Str: # TODO: This error never fires util.error("Redirect descriptor should be a string, got %s", val) return None t = val.s if not t: util.error("Redirect descriptor can't be empty") return None try: target_fd = int(t) except ValueError: util.error( "Redirect descriptor should look like an integer, got %s", val) return None return runtime.DescRedirect(n.op_id, fd, target_fd) elif redir_type == redir_arg_type_e.Here: # here word # TODO: decay should be controlled by an option val = self.word_ev.EvalWordToString(n.arg_word, decay=True) if val.tag != value_e.Str: # TODO: This error never fires util.warn("Here word body should be a string, got %s", val) return None # NOTE: bash and mksh both add \n return runtime.HereRedirect(fd, val.s + '\n') else: raise AssertionError('Unknown redirect op') elif n.tag == redir_e.HereDoc: # TODO: decay shoudl be controlled by an option val = self.word_ev.EvalWordToString(n.body, decay=True) if val.tag != value_e.Str: # TODO: This error never fires util.warn("Here doc body should be a string, got %s", val) return None return runtime.HereRedirect(fd, val.s) else: raise AssertionError('Unknown redirect type')
def Trap(argv, traps): # TODO: register trap # Example: # trap -- 'echo "hi there" | wc ' SIGINT # # Then hit Ctrl-C. # # Yeah you need the EvalHelper. traps is a list of signals to parsed # NODES. util.warn('*** trap not implemented ***') return 0
def Command(argv, funcs, path_val): arg, i = COMMAND_SPEC.Parse(argv) status = 0 if arg.v: for kind, arg in _ResolveNames(argv[i:], funcs, path_val): if kind is None: status = 1 # nothing printed, but we fail else: # This is for -v, -V is more detailed. print(arg) else: util.warn('*** command without -v not not implemented ***') status = 1 return status
def EvalCStringToken(id_, value): """ This function is shared between echo -e and $''. $'' could use it at compile time, much like brace expansion in braces.py. """ if id_ == Id.Char_Literals: return value elif id_ == Id.Char_BadBackslash: if 1: # TODO: error in strict mode # Either \A or trailing \ (A is not a valid backslash escape) util.warn('Invalid backslash escape') return value elif id_ == Id.Char_OneChar: c = value[1] return _ONE_CHAR[c] elif id_ == Id.Char_Stop: # \c returns a special sentinel return None elif id_ in (Id.Char_Octal3, Id.Char_Octal4): if id_ == Id.Char_Octal3: # $'\377' s = value[1:] else: # echo -e '\0377' s = value[2:] i = int(s, 8) if i >= 256: i = i % 256 # NOTE: This is for strict mode #raise AssertionError('Out of range') return chr(i) elif id_ == Id.Char_Hex: s = value[2:] i = int(s, 16) return chr(i) elif id_ in (Id.Char_Unicode4, Id.Char_Unicode8): s = value[2:] i = int(s, 16) #util.log('i = %d', i) return libstr.Utf8Encode(i) else: raise AssertionError
def Read(argv, mem): # TODO: # - Use IFS instead of Python's split(). arg, i = READ_SPEC.Parse(argv) if not arg.r: util.warn('*** read without -r not implemented ***') names = argv[i:] if arg.n is not None: try: name = names[0] except IndexError: name = 'REPLY' # default variable name s = os.read(sys.stdin.fileno(), arg.n) #log('read -n: %s = %s', name, s) state.SetLocalString(mem, name, s) # NOTE: Even if we don't get n bytes back, there is no error? return 0 line = sys.stdin.readline() if not line: # EOF return 1 if line.endswith('\n'): # strip trailing newline line = line[:-1] status = 0 else: # odd bash behavior: fail even if we can set variables. status = 1 # leftover words assigned to the last name n = len(names) strs = line.split(None, n - 1) # TODO: Use REPLY variable here too? for i in xrange(n): try: s = strs[i] except IndexError: s = '' # if there are too many variables #log('read: %s = %s', names[i], s) state.SetLocalString(mem, names[i], s) return status
def _StringToIntegerOrError(self, s, blame_word=None, span_id=const.NO_INTEGER): """Used by both [[ $x -gt 3 ]] and (( $x )).""" if span_id == const.NO_INTEGER and blame_word: span_id = word.LeftMostSpanForWord(blame_word) try: i = _StringToInteger(s, span_id=span_id) except util.FatalRuntimeError as e: if self.exec_opts.strict_arith: raise else: i = 0 # TODO: Need the arena for printing this error? #ui.PrettyPrintError(e) warn(e.UserErrorString()) return i
def Wait(self): # This is a list of async jobs while True: try: pid, status = posix.wait() except OSError as e: #log('wait() error: %s', e) if e.errno == errno.ECHILD: return False # nothing to wait for caller should stop elif e.errno == errno.EINTR: # This happens when we register a handler for SIGINT, and thus never # get the KeyboardInterrupt exception? Not sure why. # Try # $ cat # Now hit Ctrl-C #log('Continuing') continue # try again else: # An error we don't know about. raise else: break # no exception thrown, so no need to retry #log('WAIT got %s %s', pid, status) # TODO: change status in more cases. if posix.WIFSIGNALED(status): if posix.WTERMSIG(status) == signal.SIGINT: print() elif posix.WIFEXITED(status): status = posix.WEXITSTATUS(status) #log('exit status: %s', status) # This could happen via coding error. But this may legitimately happen # if a grandchild outlives the child (its parent). Then it is reparented # under this process, so we might receive notification of its exit, even # though we didn't start it. We can't have any knowledge of such # processes, so print a warning. if pid not in self.callbacks: util.warn("PID %d stopped, but osh didn't start it", pid) return True # caller should keep waiting callback = self.callbacks.pop(pid) callback(pid, status) self.last_status = status # for wait -n return True # caller should keep waiting
def Type(argv, funcs, path_val): arg, i = TYPE_SPEC.Parse(argv) if path_val.tag == value_e.Str: path_list = path_val.s.split(':') else: path_list = [] # treat as empty path status = 0 if not arg.t: util.warn("*** 'type' builtin called without -t ***") status = 1 # Keep going anyway for name in argv[i:]: if name in funcs: print('function') elif Resolve(name) != EBuiltin.NONE: print('builtin') elif ResolveSpecial(name) != EBuiltin.NONE: print('builtin') elif lex.IsOtherBuiltin(name): # declare, continue, etc. print('builtin') elif lex.IsKeyword(name): print('keyword') else: # Now look for files. found = False for path_dir in path_list: full_path = os.path.join(path_dir, name) if os.path.exists(full_path): print('file') found = True break if not found: # Nothing printed, but status is 1. status = 1 return status
def Umask(argv): if len(argv) == 0: # umask() has a dumb API: you can't get it without modifying it first! # NOTE: dash disables interrupts around the two umask() calls, but that # shouldn't be a concern for us. Signal handlers won't call umask(). mask = posix.umask(0) posix.umask(mask) # print('0%03o' % mask) # octal format return 0 if len(argv) == 1: a = argv[0] try: new_mask = int(a, 8) except ValueError: # NOTE: This happens if we have '8' or '9' in the input too. util.warn('*** umask with symbolic input not implemented ***') return 1 else: posix.umask(new_mask) return 0 raise args.UsageError('umask: unexpected arguments')
def _Dispatch(self, node, fork_external): # If we call RunCommandSub in a recursive call to the executor, this will # be set true (if strict-errexit is false). But it only lasts for one # command. self.check_command_sub_status = False #argv0 = None # for error message check_errexit = False # for errexit if node.tag == command_e.SimpleCommand: check_errexit = True # Find span_id for a basic implementation of $LINENO, e.g. # PS4='+$SOURCE_NAME:$LINENO:' # NOTE: osh2oil uses node.more_env, but we don't need that. span_id = const.NO_INTEGER if node.words: first_word = node.words[0] span_id = word.LeftMostSpanForWord(first_word) self.mem.SetCurrentSpanId(span_id) # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up logging. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.word_ev.EvalWordSequence(words) # This comes before evaluating env, in case there are problems evaluating # it. We could trace the env separately? Also trace unevaluated code # with set-o verbose? self.tracer.OnSimpleCommand(argv) if node.more_env: self.mem.PushTemp() try: for env_pair in node.more_env: val = self.word_ev.EvalWordToString(env_pair.val) # Set each var so the next one can reference it. Example: # FOO=1 BAR=$FOO ls / self.mem.SetVar(ast.LhsName(env_pair.name), val, (var_flags_e.Exported,), scope_e.TempEnv) # NOTE: This might never return! In the case of fork_external=False. status = self._RunSimpleCommand(argv, fork_external, span_id) finally: if node.more_env: self.mem.PopTemp() elif node.tag == command_e.Sentence: # Don't check_errexit since this isn't a real node! if node.terminator.id == Id.Op_Semi: status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: check_errexit = True if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: check_errexit = True # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.child) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: check_errexit = True result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: check_errexit = True i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: flags = word_compile.ParseAssignFlags(node.flags) if node.keyword == Id.Assign_Local: lookup_mode = scope_e.LocalOnly # typeset and declare are synonyms? I see typeset -a a=() the most. elif node.keyword in (Id.Assign_Declare, Id.Assign_Typeset): # declare is like local, except it can also be used outside functions? if var_flags_e.Global in flags: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly elif node.keyword == Id.Assign_Readonly: lookup_mode = scope_e.Dynamic flags.append(var_flags_e.ReadOnly) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope_e.Dynamic else: raise AssertionError(node.keyword) for pair in node.pairs: if pair.op == assign_op_e.PlusEqual: assert pair.rhs, pair.rhs # I don't think a+= is valid? val = self.word_ev.EvalRhsWord(pair.rhs) old_val, lval = expr_eval.EvalLhsAndLookup(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Undef, value_e.Str): pass # val is RHS elif sig == (value_e.Undef, value_e.StrArray): pass # val is RHS elif sig == (value_e.Str, value_e.Str): val = runtime.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = runtime.StrArray(old_val.strs + val.strs) else: # plain assignment spid = pair.spids[0] # Source location for tracing lval = self._EvalLhs(pair.lhs, spid, lookup_mode) # RHS can be a string or array. if pair.rhs: val = self.word_ev.EvalRhsWord(pair.rhs) assert isinstance(val, runtime.value), val else: # e.g. 'readonly x' or 'local x' val = None # NOTE: In bash and mksh, declare -a myarray makes an empty cell with # Undef value, but the 'array' attribute. #log('setting %s to %s with flags %s', lval, val, flags) self.mem.SetVar(lval, val, flags, lookup_mode, strict_array=self.exec_opts.strict_array) # Assignment always appears to have a spid. if node.spids: current_spid = node.spids[0] else: current_spid = const.NO_INTEGER self.mem.SetCurrentSpanId(current_spid) self.tracer.OnAssignment(lval, pair.op, val, flags, lookup_mode) # PATCH to be compatible with existing shells: If the assignment had a # command sub like: # # s=$(echo one; false) # # then its status will be in mem.last_status, and we can check it here. # If there was NOT a command sub in the assignment, then we don't want to # check it. if node.keyword == Id.Assign_None: # mutate existing local or global # Only do this if there was a command sub? How? Look at node? # Set a flag in mem? self.mem.last_status or if self.check_command_sub_status: self._CheckStatus(self.mem.last_status, node) # A global assignment shouldn't clear $?. status = self.mem.last_status else: status = 0 else: # To be compatible with existing shells, local assignments DO clear # $?. Even in strict mode, we don't need to bother setting # check_errexit = True, because we would have already checked the # command sub in RunCommandSub. status = 0 # TODO: maybe we should have a "sane-status" that respects this: # false; echo $?; local f=x; echo $? elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.word_ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, exit 0, break 0 levels, etc. # NOTE: We don't do anything about a top-level 'return' here. Unlike in # bash, that is OK. If you can return from a sourced script, it makes # sense to return from a main script. ok = True tok = node.token if (tok.id in (Id.ControlFlow_Break, Id.ControlFlow_Continue) and self.loop_level == 0): ok = False msg = 'Invalid control flow at top level' if ok: raise _ControlFlow(tok, arg) if self.exec_opts.strict_control_flow: e_die(msg, token=tok) else: # Only print warnings, never fatal. # Bash oddly only exits 1 for 'return', but no other shell does. ui.PrintFilenameAndLine(tok.span_id, self.arena) util.warn(msg) status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) check_errexit = False elif node.tag == command_e.AndOr: # NOTE: && and || have EQUAL precedence in command mode. See case #13 # in dbracket.test.sh. left = node.children[0] # Suppress failure for every child except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() i = 1 n = len(node.children) while i < n: #log('i %d status %d', i, status) child = node.children[i] op_id = node.ops[i-1] #log('child %s op_id %s', child, op_id) if op_id == Id.Op_DPipe and status == 0: i += 1 continue # short circuit elif op_id == Id.Op_DAmp and status != 0: i += 1 continue # short circuit if i == n - 1: # errexit handled differently for last child status = self._Execute(child) check_errexit = True else: self._PushErrExit() try: status = self._Execute(child) finally: self._PopErrExit() i += 1 elif node.tag == command_e.WhileUntil: if node.keyword.id == Id.KW_While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 self.loop_level += 1 try: while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.word_ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop self.loop_level += 1 try: for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForExpr: status = 0 init, cond, body, update = node.init, node.cond, node.body, node.update if init: self.arith_ev.Eval(init) self.loop_level += 1 try: while True: if cond: b = self.arith_ev.Eval(cond) if not b: break try: status = self._Execute(body) except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise if update: self.arith_ev.Eval(update) finally: self.loop_level -= 1 elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) check_errexit = False # not real statements elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.word_ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? # TODO: case "$@") shouldn't succeed? That's a type error? # That requires strict-array? pat_val = self.word_ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? break # Only execute action ONCE if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime libc.print_time(real, user, sys_) else: raise NotImplementedError(node.__class__.__name__) return status, check_errexit
def _EvalBracedVarSub(self, part, part_vals, quoted): """ Args: part_vals: output param to append to. """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool maybe_decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process maybe_decay_array here before returning. maybe_decay_array = False # for $*, ${a[*]}, etc. var_name = None # For ${foo=default} # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.GetVar(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted) # 2. Bracket: value -> (value v, bool maybe_decay_array) # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER # suffix ops are applied. If we take the length with a prefix op, the # distinction is ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: maybe_decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with @: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. val = value.StrArray(val.strs) elif op_id == Id.Arith_Star: maybe_decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with *: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. # ${a[*]} or "${a[*]}" : maybe_decay_array is always true val = value.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # Bash treats any string as an array, so we can't add our own # behavior here without making valid OSH invalid bash. e_die("Can't index string %r with integer", part.token.val, token=part.token) elif val.tag == value_e.StrArray: index = self.arith_ev.Eval(anode) try: # could be None because representation is sparse s = val.strs[index] except IndexError: s = None if s is None: val = value.Undef() else: val = value.Str(s) elif val.tag == value_e.AssocArray: key = self.arith_ev.Eval(anode, int_coerce=False) try: val = value.Str(val.d[key]) except KeyError: val = value.Undef() else: raise AssertionError(val.__class__.__name__) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op, token=part.token) # NOTE: When applying the length operator, we can't have a test or # suffix afterward. And we don't want to decay the array elif part.suffix_op: op = part.suffix_op if op.tag == suffix_op_e.StringNullary: if op.op_id == Id.VOp0_P: prompt = self.prompt_ev.EvalPrompt(val) val = value.Str(prompt) elif op.op_id == Id.VOp0_Q: val = value.Str(string_ops.ShellQuote(val.s)) else: raise NotImplementedError(op.op_id) elif op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # TODO: Change style to: # if self._ApplyTestOp(...) # return # It should return whether anything was done. If not, we continue to # the end, where we might throw an error. assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op, quoted, part_vals) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == effect_e.SpliceParts: return # EARLY RETURN, part_vals mutated elif effect == effect_e.SpliceAndAssign: if var_name is None: # TODO: error context e_die("Can't assign to special variable") else: # NOTE: This decays arrays too! 'set -o strict_array' could # avoid it. rhs_str = _DecayPartValuesToString(assign_part_vals, self.splitter.GetJoinChar()) state.SetLocalString(self.mem, var_name, rhs_str) return # EARLY RETURN, part_vals mutated elif effect == effect_e.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyUnarySuffixOp(val, part.suffix_op) elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized val = self._EmptyStrOrError(val) # ${undef//x/y} # globs are supported in the pattern pat_val = self.EvalWordToString(op.pat, do_fnmatch=True) assert pat_val.tag == value_e.Str, pat_val if op.replace: replace_val = self.EvalWordToString(op.replace) assert replace_val.tag == value_e.Str, replace_val replace_str = replace_val.s else: replace_str = '' regex, warnings = glob_.GlobToERE(pat_val.s) if warnings: # TODO: # - Add 'set -o strict-glob' mode and expose warnings. # "Glob is not in CANONICAL FORM". # - Propagate location info back to the 'op.pat' word. pass replacer = string_ops.GlobReplacer(regex, replace_str, op.spids[0]) if val.tag == value_e.Str: s = replacer.Replace(val.s, op) val = value.Str(s) elif val.tag == value_e.StrArray: strs = [] for s in val.strs: if s is not None: strs.append(replacer.Replace(s, op)) val = value.StrArray(strs) else: raise AssertionError(val.__class__.__name__) elif op.tag == suffix_op_e.Slice: val = self._EmptyStrOrError(val) # ${undef:3:1} if op.begin: begin = self.arith_ev.Eval(op.begin) else: begin = 0 if op.length: length = self.arith_ev.Eval(op.length) else: length = None if val.tag == value_e.Str: # Slice UTF-8 characters in a string. s = val.s try: if begin < 0: # It could be negative if we compute unicode length, but that's # confusing. # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The start index of a string slice can't be negative: %d", begin, part=part) byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0) if length is None: byte_end = len(s) else: if length < 0: # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The length of a string slice can't be negative: %d", length, part=part) byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin) except (util.InvalidSlice, util.InvalidUtf8) as e: if self.exec_opts.strict_word_eval: raise else: # TODO: # - We don't see the error location here, but we see it when set # -o strict-word-eval. # - Doesn't make the command exit with 1. It just sets the word # to empty string. util.warn(e.UserErrorString()) substr = '' # error condition else: substr = s[byte_begin : byte_end] val = value.Str(substr) elif val.tag == value_e.StrArray: # Slice array entries. # NOTE: unset elements don't count towards the length. strs = [] for s in val.strs[begin:]: if s is not None: strs.append(s) if len(strs) == length: # never true for unspecified length break val = value.StrArray(strs) else: raise AssertionError(val.__class__.__name__) # Not possible # After applying suffixes, process maybe_decay_array here. if maybe_decay_array and val.tag == value_e.StrArray: val = self._DecayArray(val) # For the case where there are no prefix or suffix ops. val = self._EmptyStrOrError(val) # For example, ${a} evaluates to value_t.Str(), but we want a # part_value.StringPartValue. part_val = _ValueToPartValue(val, quoted) part_vals.append(part_val)
def _ApplyPrefixOp(self, val, op_id, token): """ Returns: value """ assert val.tag != value_e.Undef if op_id == Id.VSub_Pound: # LENGTH if val.tag == value_e.Str: # NOTE: Whether bash counts bytes or chars is affected by LANG # environment variables. # Should we respect that, or another way to select? set -o # count-bytes? # https://stackoverflow.com/questions/17368067/length-of-string-in-bash try: length = string_ops.CountUtf8Chars(val.s) except util.InvalidUtf8 as e: # TODO: Add location info from 'part'? Only the caller has it. if self.exec_opts.strict_word_eval: raise else: # NOTE: Doesn't make the command exit with 1; it just returns a # length of -1. util.warn(e.UserErrorString()) return value.Str('-1') elif val.tag == value_e.StrArray: # There can be empty placeholder values in the array. length = sum(1 for s in val.strs if s is not None) return value.Str(str(length)) elif op_id == Id.VSub_Bang: # ${!foo}, "indirect expansion" # NOTES: # - Could translate to eval('$' + name) or eval("\$$name") # - ${!array[@]} means something completely different. TODO: implement # that. # - It might make sense to suggest implementing this with associative # arrays? if val.tag == value_e.Str: # plain variable name, like 'foo' if match.IsValidVarName(val.s): return self.mem.GetVar(val.s) # positional argument, like '1' try: return self.mem.GetArgNum(int(val.s)) except ValueError: pass if val.s in ('@', '*'): # TODO maybe_decay_array return value.StrArray(self.mem.GetArgv()) # otherwise an array reference, like 'arr[0]' or 'arr[xyz]' or 'arr[@]' i = val.s.find('[') if i >= 0 and val.s[-1] == ']': name, index = val.s[:i], val.s[i+1:-1] result = self._EvalIndirectArrayExpansion(name, index) if result is not None: return result # Note that bash doesn't consider this fatal. It makes the # command exit with '1', but we don't have that ability yet? e_die('Bad indirect expansion: %r', val.s, token=token) elif val.tag == value_e.StrArray: indices = [str(i) for i, s in enumerate(val.strs) if s is not None] return value.StrArray(indices) else: raise AssertionError else: raise AssertionError(op_id)
def Trap(argv, traps, nodes_to_run, ex): arg, i = TRAP_SPEC.Parse(argv) if arg.p: # Print registered handlers for name, value in traps.iteritems(): # The unit tests rely on this being one line. # bash prints a line that can be re-parsed. print('%s %s' % (name, value.__class__.__name__)) sys.stdout.flush() return 0 if arg.l: # List valid signals and hooks ordered = _SIGNAL_NAMES.items() ordered.sort(key=lambda x: x[1]) for name in _HOOK_NAMES: print(' %s' % name) for name, int_val in ordered: print('%2d %s' % (int_val, name)) sys.stdout.flush() return 0 try: code_str = argv[0] sig_spec = argv[1] except IndexError: raise args.UsageError('trap CODE SIGNAL_SPEC') # sig_key is NORMALIZED sig_spec: and integer signal number or string hook # name. sig_key = None sig_num = None if sig_spec in _HOOK_NAMES: sig_key = sig_spec elif sig_spec == '0': # Special case sig_key = 'EXIT' else: sig_num = _GetSignalNumber(sig_spec) if sig_num is not None: sig_key = sig_num if sig_key is None: util.error("Invalid signal or hook %r" % sig_spec) return 1 # NOTE: sig_spec isn't validated when removing handlers. if code_str == '-': if sig_key in _HOOK_NAMES: try: del traps[sig_key] except KeyError: pass return 0 if sig_num is not None: try: del traps[sig_key] except KeyError: pass # Restore default if sig_num == signal.SIGINT: RegisterSigIntHandler() else: signal.signal(sig_num, signal.SIG_DFL) return 0 raise AssertionError('Signal or trap') # Try parsing the code first. node = ex.ParseTrapCode(code_str) if node is None: return 1 # ParseTrapCode() prints an error for us. # Register a hook. if sig_key in _HOOK_NAMES: if sig_key in ('ERR', 'RETURN', 'DEBUG'): util.warn("*** The %r isn't yet implemented in OSH ***", sig_spec) traps[sig_key] = _TrapHandler(node, nodes_to_run) return 0 # Register a signal. sig_num = _GetSignalNumber(sig_spec) if sig_num is not None: handler = _TrapHandler(node, nodes_to_run) # For signal handlers, the traps dictionary is used only for debugging. traps[sig_key] = handler signal.signal(sig_num, handler) return 0 raise AssertionError('Signal or trap')
def _ApplyPrefixOp(self, val, op_id): """ Returns: value """ assert val.tag != value_e.Undef if op_id == Id.VSub_Pound: # LENGTH if val.tag == value_e.Str: # NOTE: Whether bash counts bytes or chars is affected by LANG # environment variables. # Should we respect that, or another way to select? set -o # count-bytes? # https://stackoverflow.com/questions/17368067/length-of-string-in-bash try: length = libstr.CountUtf8Chars(val.s) except util.InvalidUtf8 as e: # TODO: Add location info from 'part'? Only the caller has it. if self.exec_opts.strict_word_eval: raise else: # NOTE: Doesn't make the command exit with 1; it just returns a # length of -1. util.warn(e.UserErrorString()) return runtime.Str('-1') elif val.tag == value_e.StrArray: # There can be empty placeholder values in the array. length = sum(1 for s in val.strs if s is not None) return runtime.Str(str(length)) elif op_id == Id.VSub_Bang: # NOTES: # - Could translate to eval('$' + name) or eval("\$$name") # - ${!array[@]} means something completely different. TODO: implement # that. # - It might make sense to suggest implementing this with associative # arrays? # Treat the value of the variable as a variable name. if val.tag == value_e.Str: try: # e.g. ${!OPTIND} gives $1 when OPTIND is 1 arg_num = int(val.s) return self.mem.GetArgNum(arg_num) except ValueError: if not match.IsValidVarName(val.s): # TODO: location information. # Also note that bash doesn't consider this fatal. It makes the # command exit with '1', but we don't have that ability yet? e_die('Bad variable name %r in var ref', val.s) return self.mem.GetVar(val.s) elif val.tag == value_e.StrArray: raise NotImplementedError( '${!a[@]}') # bash gets keys this way else: raise AssertionError else: raise AssertionError(op_id)
def Trap(argv, traps, nodes_to_run, ex): arg, i = TRAP_SPEC.Parse(argv) if arg.p: # Print registered handlers for name, value in traps.iteritems(): print(name) print(value) print() sys.stdout.flush() return 0 if arg.l: # List valid signals and hooks ordered = _SIGNAL_NAMES.items() ordered.sort(key=lambda x: x[1]) for name in _HOOK_NAMES: print(' %s' % name) for name, int_val in ordered: print('%2d %s' % (int_val, name)) sys.stdout.flush() return 0 try: code_str = argv[0] sig_spec = argv[1] except IndexError: raise args.UsageError('trap CODE SIGNAL_SPEC') # NOTE: sig_spec isn't validated when removing handlers. if code_str == '-': if sig_spec in _HOOK_NAMES: try: del traps[sig_spec] except KeyError: pass return 0 sig_val = _GetSignalValue(sig_spec) if sig_val is not None: try: del traps[sig_spec] except KeyError: pass # Restore default if sig_val == signal.SIGINT: RegisterSigIntHandler() else: signal.signal(sig_val, signal.SIG_DFL) return 0 util.error("Can't remove invalid trap %r" % sig_spec) return 1 # Try parsing the code first. node = ex.ParseTrapCode(code_str) if node is None: return 1 # ParseTrapCode() prints an error for us. # Register a hook. if sig_spec in _HOOK_NAMES: if sig_spec in ('ERR', 'RETURN', 'DEBUG'): util.warn("*** The %r isn't yet implemented in OSH ***", sig_spec) traps[sig_spec] = _TrapHandler(node, nodes_to_run) return 0 # Register a signal. sig_val = _GetSignalValue(sig_spec) if sig_val is not None: handler = _TrapHandler(node, nodes_to_run) # For signal handlers, the traps dictionary is used only for debugging. traps[sig_spec] = handler signal.signal(sig_val, handler) return 0 util.error('Invalid trap %r' % sig_spec) return 1