def execve(self, pid): enter = self.arg dbg.infom("!", "#R<re-execute#>: %s" % str(enter)) fn = enter.args["filename"].path argv = enter.args["argv"] envp = enter.args["envp"] [ret, root, pwd, fds] = self.ret.aux # set sequence number as execve()'s sid self.seq = enter.sid & ((1 << 48) - 1) + 1 # XXX # - setup fds self.proc = rerun.Ptrace([fn] + argv[1:], envp, kutil.get(pwd), kutil.get(root) if root.ino != 2 else None, self.uids) # enter/exit should be execve() too (r, p) = self.proc.load(True, None) (r, p) = self.proc.load(False, None) assert r.usage & EXIT and r.nr == syscall.NR_execve # virtualize pid self.pids[pid] = r.pid dbg.rerun("vpid:%s -> pid:%s (active)" % (pid, r.pid)) # enable rerun module self.state = "active"
def detach(self, r, ts): dbg.rerun('expanding... rerun instance %s detaching' % self) self.state = "detached" self.wait_r = r self.ts = (ts[0] + 1, ) + ts[1:] r.ts = self.ts + (self.seq, ) self.seq += 1
def resume(self): dbg.rerun('expanding... rerun instance %s resuming' % self) #self.state = "resumed" self.wait_r = None self.expanding = True self.state = "done" self.release_all()
def wait4(self, vpid): """Do not use this function unless the relevant process is halfway through the syscall (i.e. unless process.syscall() has been called exactly once. """ pid = self.pids[vpid] proc = self.proc.procs[pid.pid] assert proc.getreg("rax") == syscall.NR_wait4 orig_nohang = proc.getreg("rdx") & os.WNOHANG orig_rdx = proc.getreg("rdx") proc.setreg("rdx", orig_rdx | os.WNOHANG) # fire (r, p) = self.proc.load(False, pid.pid) self.__fix_pid(r, vpid) self.__fix_ts(r, False) if r.ret == 0 and not orig_nohang: # A child exists, but it has not changed state yet. Modify rip (+regs) # to execute wait4() next time. # TODO: pull this code into rerun.py proc.setreg("rax", proc.getreg("orig_rax")) proc.setreg("rip", proc.getreg("rip") - 2) dbg.rerun('messing with rerun registers outside of rerun.py') return None dbg.rerunm("!", "SKIP: [%s] %s" % (r.sid, str(r))) # keep r, p to communicate between ret/arg actors self.set_last(r, p) return (r, p)
def redo(self): dbg.syscall("redoing ProcSysRet") if not self.actor.rerun and getattr(self.retnode, 'rerun', None): self.actor.rerun = self.retnode.rerun if self.retnode.data is None: # This probably means that the retnode has been rolled back to # its (-Inf, -Inf, None) checkpoint and has not been updated # (rolled forward), probably because the syscall that was to # fill it never got executed. dbg.syscall("no argsnode data for %s, returning prematurely" % self) return # start shepherded re-execution if self.retnode.data.name == "execve" and not self.actor.rerun: dbg.rerun('actor %s (%s) has no rerun object' % (self.actor, id(self.actor))) # create rerun process (arg, ret) = self.actor.get_execve() if hasattr(self, "uids"): rerun = Rerun(arg, ret, self.uids) else: rerun = Rerun(arg, ret) rerun.execve(self.actor.pid) # process actor has a reference to the rerun module self.actor.rerun = rerun rerun.add(self.actor) return if self.actor.rerun and self.actor.rerun.is_expanding(): pid = self.retnode.data.pid (r, p) = self.actor.rerun.next(True, pid) if r == None: return assert r.usage & ENTER # pass rerun object to the bumped process if r.pid != self.retnode.data.pid: assert r.pid == pid actor = ProcessActor.get(r.pid, r) actor.rerun = self.actor.rerun actor.rerun.add(actor) # create pcall action self.osloader.parse_record(r) return # It's not undesirable that we fall through to here. ProcSysRets should # only be reexecuted if there is some information to be handed back to the # process (which there never should be, because in such a circumstance, the # rerun module would already be active, and receiving "sysret nodes" # directly from the kernel, as it were). dbg.syscall('ignoring Procsysret redo request')
def check_args(r, a): for k in r.args: # ignore mmap/munmap: addr (ASLR, change every execution) if r.nr in [syscall.NR_mmap, syscall.NR_munmap] and k == "addr": continue # ignore ioctl buf (user address) argment, (XXX need to check its value) if r.nr in [syscall.NR_ioctl] and k == "buf": continue # compare inode instead of `fd' number if isinstance(a.args[k], sysarg.file): # like, fd != -1 & pts and socket if (a.args[k].inode and a.args[k].inode.prefix in ["pts", "socket"]): continue if a.args[k].inode != r.args[k].inode: dbg.trace("1)", a.args[k]) dbg.trace("2)", r.args[k]) return False continue # check identical if a.args[k] != r.args[k]: if r.nr == syscall.NR_open: # TODO(ipopov). Check this. It seems that the argument to open() should # not include the inode opened. Surely that belongs in the return value # of the syscall. Handling this here is a hack, and it needs to be # moved into a more general place. if (a.args[k].path == r.args[k].path and a.args[k].root_in == r.args[k].root_in and a.args[k].root_pn == r.args[k].root_pn and a.args[k].pathid == a.args[k].pathid): dbg.rerun( 'short circuiting. returning True. would have returned %s.' % (str(a.args[k]) == str(r.args[k]))) continue # slow path: because some obj could be deep-copied, str -> repr if str(a.args[k]) == str(r.args[k]): continue # but sometimes, this str representation gets messed up for whatever # reason, so handle this special case here: # (needless to say, TODO: get some more sane argument checking here) if isinstance(a.args[k], list): if str(set(a.args[k])) == str(set(r.args[k])): dbg.trace('hi ivo!') continue dbg.trace("1)", a.args[k]) dbg.trace("2)", r.args[k]) return False return True
def clone(self, vpid, child_pid): pid = self.pids[vpid] (r, p) = self.proc.load(False, pid.pid) assert r.usage & EXIT and r.nr == syscall.NR_clone # Virtualize child pid. It's possible that this clone syscall takes place # during expansion, in which case the child pid cannot be matched up to a # vpid from the original execution. In that case, let vpid=pid. if not child_pid: child_pid = r.ret self.pids[child_pid] = r.ret dbg.rerun("vpid:%s -> pid:%s (active)" % (child_pid, r.ret)) self.__fix_pid(r, vpid) self.__fix_ts(r, False) self.set_last(r, p) return (r, p)
def next(self, entering, vpid): # convert pid pid = self.pids[vpid] # execute one step, transform to pid integer for rerun module (r, p) = self.proc.load(entering, pid.pid) dbg.rerun('proc.load returned %s' % str((r, p))) if r == None or p == None: return (r, p) self.__fix_pid(r, vpid) self.__fix_ts(r, entering) dbg.rerunm("!", "[%s] %s" % (r.sid & ((1 << 48) - 1), str(r))) # keep r, p to communicate between ret/arg actors self.set_last(r, p) return (r, p)
def redo(self): """Execute another action of this action's actor. If the greedy heuristic matches the current action to the action just executed, we're fine. Otherwise, call parse_record() to create a new node in the action history graph. """ dbg.syscall("redoing") rerun = self.actor.rerun if not rerun: dbg.syscall("no rerun instance bound to ProcSyscall %s;" "returning prematurely" % self) return # following history while rerunning if rerun.is_following(): (r, p) = rerun.next(True, self.actor.pid) if r == None: dbg.syscall('Aborting syscall; rerun returned no record') return assert r.usage & ENTER # special treatment of wait4 # (see note in wait4.doc) # The following if statement seems pretty impenetrable. What I think it # does is the following. If a process under rerun tries to execute a # wait4, but that doesn't match up with the expected syscall from the # previous execution, call code.interact() and handle it manually. if (r.nr == syscall.NR_wait4 and self.argsnode.origdata.nr != syscall.NR_wait4): dbg.rerun('entering interactive') code.interact(local=locals()) # wait4() again, at this time we should be able # to fetch child exit status (r, p) = rerun.next(False, self.actor.pid) assert r.usage & ENTER and r.nr == syscall.NR_wait4 # this is a real system call we should execute # on this action node (r, p) = rerun.next(True, self.actor.pid) assert r.usage & ENTER # match up if (self.argsnode.origdata.nr == r.nr and check_args(r, self.argsnode.origdata)): # update arguments self.argsnode.data = deepcopy(self.argsnode.origdata) self.argsnode.data.args = r.args self.argsnode.rerun = rerun # failed: need to explore/create new nodes/edges else: dbg.infom( "!", "#R<mismatched#> syscalls: %s vs %s" % (self.argsnode.origdata, r)) assert self.argsnode.data == None ts = 0 for a in rerun.actors: t = max(a.actions) if ts < t.tac: ts = t.tac rerun.detach(r, ts) # Actually create a new node to correspond to what was just # executed. self.osloader.parse_record(r) # rerunning but creating new nodes/edges elif rerun.is_state("detached"): # if detached node if rerun.wait_r == self.argsnode.origdata: rerun.resume() #rerun.activate() # ipopov: ???? if rerun.is_expanding(): self.argsnode.data = self.argsnode.origdata self.argsnode.rerun = rerun
def kill(self, vpid): pid = self.pids[vpid] dbg.rerun('killing rerun process with vpid %s pid %s' % (pid, vpid)) self.proc.kill(pid.pid) del self.last[vpid] del self.pids[vpid]