def EvalTildeSub(self, prefix): """Evaluates ~ and ~user. Args: prefix: The tilde prefix (possibly empty) """ if prefix == '': # First look up the HOME var, and then env var defined, val = self.mem.Get('HOME') if defined: return True, val # If no env, fall back on /etc/passwd uid = os.getuid() try: e = pwd.getpwuid(uid) except KeyError: s = '~' + prefix else: s = e.pw_dir return True, Value.FromString(s) # http://linux.die.net/man/3/getpwnam try: e = pwd.getpwnam(prefix) except KeyError: s = '~' + prefix else: s = e.pw_dir return True, Value.FromString(s)
def InitEvaluator(): mem = cmd_exec.Mem('', []) val1 = Value.FromString('xxx') val2 = Value.FromString('yyy') pairs = [(ast.LeftVar('x'), val1), (ast.LeftVar('y'), val2)] mem.SetLocal(pairs, 0) exec_opts = cmd_exec.ExecOpts() # Don't need side effects for most things return word_eval.CompletionEvaluator(mem, exec_opts)
def test_scan_cull(self): maps = [InternalMmap("0-32", 32, "r--w")] # 15 because 32/4 = 8, since we need twice the scan area, we have a total of 16, then we # append a false value too to make sure the filtering works packed = [Value(0, 0, 123)] * 15 packed.append(Value(0, 0, 12)) with patch.object(Memory, "read", side_effect=packed), \ patch("builtins.open", mock_open()), \ patch("psutil.Process.__init__", return_value=None), \ patch("psutil.Process.memory_maps", return_value=maps): self.memory.attach(123) self.assertEqual(len(self.memory.scan(123)), 8) self.assertEqual(len(self.memory.scan(123)), 7)
def _scan_initial(self, value: typing.Any, value_type: Type, match_function: typing.Callable): """ initial scanning, creates the first list which then will be culled down :param value: value to look for :param value_type: the type of the value :return: a list of entries found """ self.entries = [] fmt = value_type.get_format() size = value_type.size for mem_map in self.process.memory_maps(grouped=False): if mem_map.path in ("[vvar]", "[vsyscall]") or "r" not in mem_map.perms: # There seems to be some bug that you cannot read from vvar from an # outside process continue print("scanning range:", mem_map.addr) addr = int(mem_map.addr.split("-")[0], 16) map_size = int(mem_map.size) read_bytes = self.read(addr, map_size) self.entries.extend([ Value(self.pid, addr + i * size, read_value[0], value_type) for i, read_value in enumerate(fmt.iter_unpack(read_bytes)) if match_function(read_value[0], value) ]) return self.entries
def EvalArithSub(self, anode): arith_ev = expr_eval.ArithEvaluator(self.mem, self) if arith_ev.Eval(anode): num = arith_ev.Result() return True, Value.FromString(str(num)) else: self.error_stack.extend(arith_ev.Error()) return False, None
def _Read(self, argv): names = argv[1:] line = sys.stdin.readline() if not line: # EOF return 1 # TODO: split line and do that logic val = Value.FromString(line.strip()) pairs = [(ast.LeftVar(names[0]), val)] self.mem.SetLocal(pairs, 0) # read always uses local variables? return 0
def EvalWordPart(self, part, quoted=False): if part.tag == word_part_e.ArrayLiteralPart: return self.EvalArrayLiteralPart(part) elif part.tag == word_part_e.LiteralPart: s = part.token.val return True, Value.FromString(s) elif part.tag == word_part_e.EscapedLiteralPart: val = self.token.val assert len(val) == 2, val # e.g. \* assert val[0] == '\\' s = val[1] return True, Value.FromString(s) elif part.tag == word_part_e.SingleQuotedPart: s = ''.join(t.val for t in part.tokens) return True, Value.FromString(s) elif part.tag == word_part_e.DoubleQuotedPart: return self.EvalDoubleQuotedPart(part) elif part.tag == word_part_e.CommandSubPart: # TODO: If token is Id.Left_ProcSubIn or Id.Left_ProcSubOut, we have to # supply something like /dev/fd/63. return self.EvalCommandSub(part.command_list) elif part.tag in (word_part_e.SimpleVarSub, word_part_e.BracedVarSub): # This is the only one that uses quoted? return self.EvalVarSub(part, quoted=quoted) elif part.tag == word_part_e.TildeSubPart: # We never parse a quoted string into a TildeSubPart. assert not quoted return self.EvalTildeSub(part.prefix) elif part.tag == word_part_e.ArithSubPart: return self.EvalArithSub(part.anode) else: raise AssertionError(part.tag)
def EvalDoubleQuotedPart(self, part): # NOTE: quoted arg isn't used strs = [''] for p in part.parts: ok, val = self.EvalWordPart(p, quoted=True) if not ok: return False, Value.FromString('') # ERROR assert isinstance(val, Value), val is_str, s = val.AsString() if is_str: strs[-1] += s else: _AppendArray(strs, val.a) # top level escape # TODO: Fix bug. "$@" could have only one entry, but we still want it to # be an array! if len(strs) == 1: val = Value.FromString(strs[0]) else: val = Value.FromArray(strs) return True, val
def Get(self, name): for i in range(len(self.var_stack) - 1, -1, -1): scope = self.var_stack[i] if name in scope: # Don't need to use flags _, value = scope[name] return True, value # Fall back on environment v = os.getenv(name) if v is not None: return True, Value.FromString(v) return False, None
def EvalCommandSub(self, node): p = self.ex._GetProcessForNode(node) # NOTE: We could do an optimization for pipelines. Pick the last # process element, and do pi.procs[-1].CaptureOutput() stdout = [] p.CaptureOutput(stdout) status = p.Run() # Runtime errors: # what if the command sub was "echo foo > $@". That is invalid. Then # Return false here. How do we get that value from the Process then? Do # we use a special return value? ok = True # I think $() does a strip basically? # argv $(echo ' hi')$(echo bye) -> hibye s = ''.join(stdout).strip() return ok, Value.FromString(s)
def EvalArrayLiteralPart(self, part): #print(self.words, '!!!') array = [] for w in part.words: # - perform splitting when necessary? # set IFS here? ok, val = self.EvalCompoundWord(w) if not ok: # TODO: show errors? return False, None # NOTE: For now, we enforce homogeneous arrays of strings. This is for # the shell / proc dialect. For func dialect, we can have heterogeneous # arrays. is_str, s = val.AsString() if is_str: array.append(s) else: # TODO: # - interpolate array into array raise AssertionError('Expected string') return True, Value.FromArray(array)
def test_scan(self, read_mock: Mock, mem_map_mock: Mock): read_mock.side_effect = [123, 123] mem_map_mock.return_value = [InternalMmap("0-8", 8, "r--w")] self.assertEqual(self.memory.scan("123"), [Value(0, 0, 123), Value(0, 4, 123)])
def _EvalVar(self, name, quoted=False): """Evaluates the given variable in the current scope. Returns: bool ok, Value v """ # $@ is special -- it need to know whether it is in a double quoted # context. # # - If it's $@ in a double quoted context, return an ARRAY. # - If it's $@ in a normal context, return a STRING, which then will be # subject to splitting. # https://www.gnu.org/software/bash/manual/bashref.html#Special-Parameters # http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_05_02 # "When the expansion occurs within a double-quoted string (see # Double-Quotes), it shall expand to a single field with the value of # each parameter separated by the first character of the IFS variable, or # by a <space> if IFS is unset. If IFS is set to a null string, this is # not equivalent to unsetting it; its first character does not exist, so # the parameter values are concatenated." ifs = _GetIfs(self.mem) sep = ifs[0] if ifs else '' # GetArgv. And then look at whether we're in a double quoted context or # not. if name in ('@', '*'): if name == '@' and quoted: # "$@" evaluates to an array argv = self.mem.GetArgv() val = Value.FromArray(argv) #print('$@', val) return True, val else: # $@ $* "$*" argv = self.mem.GetArgv() val = Value.FromString(ifs[0].join(argv)) return True, val elif name == '?': # TODO: Have to parse status somewhere. # External commands need WIFEXITED test. What about subshells? return True, Value.FromString(str(self.mem.last_status)) elif name == '#': argv = self.mem.GetArgv() s = str(len(argv)) return True, Value.FromString(s) # TODO: $0 $1 ... else: # BracedVarSub is for $foo, without qualifiers. defined, val = self.mem.Get(name) return defined, val if defined: # If there are no modifiers like a[@], add implicit a[0], as long as # exec option bash_array is set. # # TODO: $array is an error. Only an explicit ${array[0]} or # ${array[@]} is valid. This is compatible with bash, and makes it # easier to read and catch bugs. We know the type at compile # time. It makes it easier to apply VarOps with unsurprising # semantics. if self.exec_opts.bash_array: val = val.EvalToFirst() return True, val else: # TODO: MOVE down to EvalVarSub(). Test ops will change this. if self.exec_opts.nounset: # stack will unwind token = None # TODO: Need to have the ast for varsub. BracedVarSub should have a # token? #tb = self.mem.GetTraceback(token) self._AddErrorContext("Unset variable %s" % name) return False, None else: return True, Value.FromString('')
def Execute(self, node): """ Args: node: of type AstNode """ redirects = self._EvalRedirects(node) # TODO: Change this to its own enum? # or add EBuiltin.THROW _throw? For testing. # Is this different han exit? exit should really be throw. Because we # want to be able to unwind the stack, show stats, etc. Exiting in the # middle is bad. # exit and _throw could be the same, except _throw takes an error message, # and exits 1, and shows traceback. cflow = EBuiltin.NONE # TODO: Only eval argv[0] once. It can have side effects! if node.tag == command_e.SimpleCommand: argv = self.ev.EvalWords(node.words) if argv is None: err = self.ev.Error() # TODO: Throw shell exception raise AssertionError('Error evaluating words: %s' % err) more_env = self.ev.EvalEnv(node.more_env) if more_env is None: print(self.error_stack) # TODO: throw exception raise AssertionError() thunk = self._GetThunkForSimpleCommand(argv, more_env) # Don't waste a process if we'd launch one anyway. if thunk.IsExternal(): p = Process(thunk, fd_state=self.fd_state, redirects=redirects) status = p.Run() if os.WIFEXITED(status): status = os.WEXITSTATUS(status) #print('exited with code', code) else: sig = os.WTERMSIG(status) #print('exited with signal', sig) # TODO: Is this right? status = 0 else: # Internal for r in redirects: r.ApplyInParent(self.fd_state) status, cflow = thunk.RunInParent() restore_fd_state = thunk.ShouldRestoreFdState() # Special case for exec 1>&2 (with no args): we permanently change the # fd state. BUT we don't want to restore later. # # TODO: Instead of this, maybe r.ApplyPermaent(self.fd_state)? if restore_fd_state: self.fd_state.RestoreAll() else: self.fd_state.ForgetAll() elif node.tag == command_e.Sentence: # TODO: Compile this away status, cflow = self.Execute(node.command) elif node.tag == command_e.Pipeline: status, cflow = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._GetProcessForNode(node.children[0]) status = p.Run() elif node.tag == command_e.DBracket: bool_ev = expr_eval.BoolEvaluator(self.mem, self.ev) ok = bool_ev.Eval(node.expr) if ok: status = 0 if bool_ev.Result() else 1 else: raise AssertionError('Error evaluating boolean: %s' % bool_ev.Error()) elif node.tag == command_e.DParen: arith_ev = expr_eval.ArithEvaluator(self.mem, self.ev) ok = arith_ev.Eval(node.child) if ok: i = arith_ev.Result() # Negate the value: non-zero in arithmetic is true, which is zero in # shell land status = 0 if i != 0 else 1 else: raise AssertionError('Error evaluating (( )): %s' % arith_ev.Error()) elif node.tag == command_e.Assignment: pairs = [] for pair in node.pairs: # NOTE: do_glob=False, because foo=*.a makes foo equal to '*.a', # literally. # TODO: Also have to evaluate the right hand side. ok, val = self.ev.EvalCompoundWord(pair.rhs) if not ok: return None pairs.append((pair.lhs, val)) flags = 0 # TODO: Calculate from keyword/flags if node.keyword == Id.Assign_Local: self.mem.SetLocal(pairs, flags) else: # could be readonly/export/etc. self.mem.SetGlobal(pairs, flags) # TODO: This should be eval of RHS, unlike bash! status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = 0 # for empty list for child in node.children: status, cflow = self.Execute(child) # last status wins if cflow in (EBuiltin.BREAK, EBuiltin.CONTINUE): break elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children status, cflow = self.Execute(left) if node.op_id == Id.Op_DPipe: if status != 0: status, cflow = self.Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status, cflow = self.Execute(right) else: raise AssertionError elif node.tag == command_e.While: while True: status, _ = self.Execute(node.cond) if status != 0: break status, cflow = self.Execute(node.body) # last one wins if cflow == EBuiltin.BREAK: cflow = EBuiltin.NONE # reset since we respected it break if cflow == EBuiltin.CONTINUE: cflow = EBuiltin.NONE # reset since we respected it elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: iter_list = self.ev.EvalWords(node.iter_words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop cflow = EBuiltin.NONE for x in iter_list: self.mem.SetSimpleVar(iter_name, Value.FromString(x)) status, cflow = self.Execute(node.body) if cflow == EBuiltin.BREAK: cflow = EBuiltin.NONE # reset since we respected it break if cflow == EBuiltin.CONTINUE: cflow = EBuiltin.NONE # reset since we respected it elif node.tag == command_e.DoGroup: # Delegate to command list # TODO: This should be compiled out! status, cflow = self.Execute(node.child) elif node.tag == command_e.FuncDef: self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: status, _ = self.Execute(arm.cond) if status == 0: status, _ = self.Execute(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status, _ = self.Execute(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: raise NotImplementedError else: raise AssertionError(node.tag) if self.exec_opts.errexit: if status != 0: # TODO: token should be set to what? Is it node.begin_word and # node.end_word? token = None tb = self.mem.GetTraceback(token) self._SetException( tb, "Command %s exited with code %d" % ('TODO', status)) # cflow should be EXCEPT # TODO: Is this the right place to put it? Does it need a stack for # function calls? self.mem.last_status = status return status, cflow
def EvalVarSub(self, part, quoted=False): """ Args: part: SimpleVarSub or BracedVar ops: list of VarOp to execute quoted: whether the var sub was double quoted """ if part.tag == word_part_e.SimpleVarSub: name = part.token.val[1:] # strip off leading $ defined, val = self._EvalVar(name, quoted=quoted) if not defined: self._AddErrorContext("Undefined variable %s" % name) return False, None # TODO: Fix this and merge it with logic below. Respect 'nounset' # option, etc. return True, val name = part.token.val # Possibilities: Array OR Index; then Test OR Transform # So instead of a list of ops, it should be optional IndexOp, optional # TestOp, optional TransformOp. # empty='' # $unset -> '' EMPTY_UNQUOTED # ${unset:-foo} -> foo # ${unset-foo} -> foo # $empty -> '' # ${empty:-foo} -> foo # ${empty-foo} -> '' defined, val = self._EvalVar(name, quoted=quoted) #print('@@@', defined, val) array_ok = (name == '@') # Don't need any op for array $@ index_error = False # test_op can suppress this if defined and part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id # TODO: Change this to array_op instead of bracket_op? if op_id == Id.Lit_At: if val.IsArray(): array_ok = True else: self._AddErrorContext("Can't index non-array with @") return False, None elif op_id == Id.Arith_Star: if val.IsArray(): array_ok = True else: self._AddErrorContext("Can't index non-array with *") return False, None else: raise AssertionError(op_id) elif part.bracket_op.tag == bracket_op_e.ArrayIndex: array_ok = True is_array, a = val.AsArray() if is_array: anode = part.bracket_op.expr # TODO: This should propagate errors arith_ev = expr_eval.ArithEvaluator(self.mem, self) ok = arith_ev.Eval(anode) if not ok: self._AddErrorContext( 'Error evaluating arith sub in index expression') return False, None index = arith_ev.Result() try: s = a[index] except IndexError: index_error = True defined = False val = None else: val = Value.FromString(s) else: # it's a string raise NotImplementedError( "String indexing not implemented") else: raise AssertionError(part.bracket_op.tag) if defined and val.IsArray(): if not array_ok: self._AddErrorContext( "Array was referenced without explicit index, e.g. ${a[@]} " "or ${a[0]}") return False, None # if the op does NOT have colon #use_default = not defined if part.suffix_op and LookupKind(part.suffix_op.op_id) == Kind.VTest: op = part.suffix_op # TODO: Change this to a bit test. if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals, Id.VTest_ColonQMark, Id.VTest_ColonPlus): is_falsey = not defined or val.IsEmptyString() else: is_falsey = not defined #print('!!',id, is_falsey) if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen): if is_falsey: argv = [] ok, val2 = self.EvalCompoundWord(op.arg_word) if not ok: return False, None val2.AppendTo(argv) # TODO: This is roundabout val = Value.FromArray(argv) defined = True # now we have a variable #print("DEFAULT", val) # TODO: # + -- inverted test -- assign to default # ? -- error # = -- side effect assignment else: raise NotImplementedError(id) if not defined: # TODO: MOVE down to EvalVarSub(). Test ops will change this. if self.exec_opts.nounset: # stack will unwind token = None # TODO: Need to have the ast for varsub. BracedVarSub should have a # token? #tb = self.mem.GetTraceback(token) self._AddErrorContext("Unset variable %s" % name) return False, None else: print("UNDEFINED") # TODO: Isn't this where we do EMPTY_UNQUOTED? return True, Value.FromString('') if part.prefix_op: op_id = part.prefix_op if op_id == Id.VSub_Pound: # LENGTH if val.IsArray(): #print("ARRAY LENGTH", len(val.a)) length = len(val.a) else: #print("STRING LENGTH", len(val.s)) length = len(val.s) val = Value.FromString(str(length)) # NOTE: You could have both prefix and suffix if part.suffix_op and LookupKind( part.suffix_op.op_id) in (Kind.VOp1, Kind.VOp2): op = part.suffix_op # NOTES: # - These are VECTORIZED on arrays # - I want to allow this with my shell dialect: @{files|slice 1 # 2|upper} does the same thing to all of them. # - How to do longest and shortest possible match? bash and mksh both # call fnmatch() in a loop, with possible short-circuit optimizations. # - TODO: Write a test program to show quadratic behavior? # - original AT&T ksh has special glob routines that returned the match # positions. # Implementation: # - Test if it is a LITERAL or a Glob. Then do a plain string # operation. # - If it's a glob, do the quadratic algorithm. # - NOTE: bash also has an optimization where it extracts the LITERAL # parts of the string, and does a prematch. If none of them match, # then it SKIPs the quadratic algorithm. # - The real solution is to compile a glob to RE2, but I want to avoid # that dependency right now... libc regex is good for a bunch of # things. # - Bash has WIDE CHAR support for this. With wchar_t. # - All sorts of functions like xdupmbstowcs # # And then pat_subst() does some special cases. Geez. # prefix strip if op.op_id == Id.VOp1_DPound: pass elif op.op_id == Id.VOp1_Pound: pass # suffix strip elif op.op_id == Id.VOp1_Percent: print(op.words) argv = [] for w in op.words: ok, val2 = self.EvalCompoundWord(w) if not ok: return False, None val2.AppendTo(argv) # TODO: Evaluate words, and add the SPACE VALUES, getting a single # string. And then test if it's a literal or glob. suffix = argv[0] if val.IsArray(): # TODO: Vectorize it raise NotImplementedError else: s = val.s if s.endswith(suffix): s = s[:-len(suffix)] val = Value.FromString(s) elif op.op_id == Id.VOp1_DPercent: pass # Patsub, vectorized elif op.op_id == Id.VOp2_Slash: pass # Either string slicing or array slicing. However string slicing has a # unicode problem? TODO: Test bash out. We need utf-8 parsing in C++? # # Or maybe have a different operator for byte slice and char slice. elif op.op_id == Id.VOp2_Colon: pass else: raise NotImplementedError(op) return True, val
def SetGlobalString(self, name, s): """Helper for completion.""" assert isinstance(s, str) val = Value.FromString(s) pairs = [(name, val)] self.SetGlobal(pairs, 0)
def SetGlobalArray(self, name, a): """Helper for completion.""" assert isinstance(a, list) val = Value.FromArray(a) pairs = [(name, val)] self.SetGlobal(pairs, 0)
def EvalCommandSub(self, node): # Just return a dummy string? return True, Value.FromString('__COMMAND_SUB_NOT_EXECUTED__')
def test_scan_cull_empty(self): self.assertEqual(self.memory._scan_cull(Value(0, 0, 123)), [])
def EvalCompoundWord(self, word, ifs='', do_glob=False, elide_empty=True): """CompoundWord.Eval(). This is used in the following contexts: - Evaluating redirect words: no glob and no word splitting - Right hand side of assignments -- no globbing - [[ context - no word splitting - but do_glob=True when on RHS of == - we will later use in fnmatch (not glob()) - elide_empty=False Args: w: word to evaluate ifs: None if we don't want any word splitting. Or a bunch of characters. do_glob: Whether we are performing globs AFTER this. This means that quoted literal glob metacharacters need to start with \. e.g. "*" and '*' turn into \*. But other metacharacters must be left alone. Returns: Value -- empty unquoted, string, or array """ assert isinstance( word, ast.CompoundWord), "Expected CompoundWord, got %s" % word # assume we elide, unless we get something "significant" is_empty_unquoted = True ev = self strs = [''] for p in word.parts: ok, val = self.EvalWordPart(p, quoted=False) if not ok: self._AddErrorContext("Error evaluating word part %r" % p) return False, Value.FromString('') assert isinstance(val, Value), val is_str, s = val.AsString() #print('-VAL', val, is_str) glob_escape = do_glob and not _GlobsAreExpanded(p) if is_str: if _IsSubst(p): # Split substitutions # NOTE: Splitting is the same whether we are glob escaping or not split_parts = _IfsSplit(s, ifs) empty = _AppendArray(strs, split_parts, glob_escape=glob_escape) if not empty: is_empty_unquoted = False else: # Don't split # Any non-subst parts, even '', means we don't elide. is_empty_unquoted = False if glob_escape: s = _GlobEscape(s) strs[-1] += s else: # The result of a DoubleQuotedPart can be an array. #print('ARRAY', val.a) is_empty_unquoted = False # No glob escape because callee (e.g. DoubleQuotedPart) is responsible _AppendArray(strs, val.a, glob_escape=glob_escape) if elide_empty and is_empty_unquoted: val = Value.EmptyUnquoted() elif len(strs) == 1: val = Value.FromString(strs[0]) else: val = Value.FromArray(strs) return True, val