def updateStatus(self, status): if os.WIFSTOPPED(status): logging.debug("STOPPED") self.stopped = True elif os.WIFCONTINUED(status): logging.debug("CONTINUED") self.stopped = False elif os.WIFSIGNALED(status): logging.debug("SIGNALED") self.process.returncode = -os.WTERMSIG(status) elif os.WIFEXITED(status): logging.debug("EXITED") self.process.returncode = os.WEXITSTATUS(status)
def get_sigchlds(): try: while True: (pid, status) = os.waitpid(-1, os.WUNTRACED | os.WCONTINUED | os.WNOHANG) if pid == 0: return res = { "ssi_signo": 0, # Signal number "ssi_errno": 0, # Error number (unused) "ssi_code": 0, # Signal code "ssi_pid": 0, # PID of sender "ssi_uid": 0, # Real UID of sender "ssi_fd": 0, # File descriptor (SIGIO) "ssi_tid": 0, # Kernel timer ID (POSIX timers) "ssi_band": 0, # Band event (SIGIO) "ssi_overrun": 0, # POSIX timer overrun count "ssi_trapno": 0, # Trap number that caused signal "ssi_status": 0, # Exit status or signal (SIGCHLD) "ssi_int": 0, # Integer sent by sigqueue(3) "ssi_ptr": 0, # Pointer sent by sigqueue(3) "ssi_utime": 0, # User CPU time consumed (SIGCHLD) "ssi_stime": 0, # System CPU time consumed (SIGCHLD) "ssi_addr": 0, # Address that generated signal (for hardware-generated signals) } res["ssi_signo"] = signal.SIGCHLD res["ssi_pid"] = pid if os.WIFEXITED(status): res["ssi_code"] = CLD_EXITED res["ssi_status"] = os.WEXITSTATUS(status) elif os.WCOREDUMP(status): res["ssi_code"] = CLD_DUMPED res["ssi_status"] = os.WTERMSIG(status) elif os.WIFCONTINUED(status): res["ssi_code"] = CLD_CONTINUED elif os.WIFSTOPPED(status): res["ssi_code"] = CLD_STOPPED res["ssi_status"] = os.WSTOPSIG(status) elif os.WIFSIGNALED(status): res["ssi_code"] = CLD_KILLED res["ssi_status"] = os.WTERMSIG(status) yield res except OSError: return
def __sig_child_handler(self, signum, frame): # Our child exits with sig 9 when all is good... so map that to 0 try: status = None sig = None core = False self.logger.debug("Running children: %s" % self.executor_pids) self.logger.debug("Got signal %s" % signum) pid, ret = os.wait() msg = "Child %s: wait returned code %s which means:" % (pid, ret) if os.WIFSIGNALED(ret): sig = os.WTERMSIG(ret) msg += " signalled %s" % sig if os.WIFEXITED(ret): status = os.WEXITSTATUS(ret) msg += " exited %s" % status if os.WIFSTOPPED(ret): msg += " stopped %s" % os.WSTOPSIG(ret) if os.WCOREDUMP(ret): core = True msg += " core dumped" if os.WIFCONTINUED(ret): msg += " contunied" self.logger.debug(msg) if pid in self.executor_pids: self.executor_pids.remove(pid) self.executor_rets.append((status, sig, core)) else: self.logger.error("Pid %s is not a child" % pid) ret = 0 if len(self.executor_pids) == 0: self.logger.trace("Statuses of all executors: %s" % self.executor_rets) for st, sg, co in self.executor_rets: if st is not None and st != 0: ret = st if co: ret = 1 self.logger.debug("Exit with code %s" % ret) sys.exit(ret) except Exception, ex: self.logger.error("Error waiting for child process: %s" % ex)
def _status_string(status): "Pretty status description for exited child." if os.WIFSIGNALED(status): return "Terminated by %s (%d)" % (_signals.get( os.WTERMSIG(status), "unknown signal"), os.WTERMSIG(status)) if os.WIFEXITED(status): return "Exited with status %d" % os.WEXITSTATUS(status) if os.WIFSTOPPED(status): return "Stopped by %s (%d)" % (_signals.get( os.WSTOPSIG(status), "unknown signal"), os.WSTOPSIG(status)) if os.WIFCONTINUED(status): return "Continued from stop" return "Unknown reason (%r)" % status
def status_code(status): ''' Decode child process exit code. status is the exist status indication from os.waitpid(pid,0)[1] ''' if os.WIFSIGNALED(status): # process exited due to a signal # get the signal which caused the process to exit: make it negative to # distinguish from exit(2) call return -os.WTERMSIG(status) elif os.WIFEXITED(status): # process exited using exit(2) system call # get the integer parameter to exit(2) call return os.WEXITSTATUS(status) elif os.WIFSTOPPED(status) or os.WIFCONTINUED(status): raise RuntimeError("Child stopped or continued?") elif os.WCOREDUMP(status): raise RuntimeError("Child core dump!") else: raise RuntimeError("Unknown child return status!")
def child_handler(): while True: #make sure to clean up all children try: child = os.waitpid(-1, os.WNOHANG | os.WCONTINUED | os.WUNTRACED) # child will be a tuple of (pid, exit_status) if child == (0, 0): break pid = child[0] exit_status = child[1] # get the job from the global joblist that contains # subprocess with pid pid j = joblist.get_job_with_process(pid) # get the subprocess from the job that matches pid pid p = j.get_subprocess(pid) # updates the status of the child process if (os.WIFEXITED(exit_status)): p.set_status(STATUS.TERMINATED) elif (os.WIFSTOPPED(exit_status)): p.set_status(STATUS.STPPED) elif (os.WIFCONTINUED(exit_status)): p.set_status(STATUS.RUNNING) # if the child was terminated by signal, print the relevant information elif (os.WIFSIGNALED(exit_status)): print("Job pid: " + str(pid) + " terminated by signal: " + str(os.WTERMSIG(exit_status))) p.set_status(STATUS.TERMINATED) # updates job list by looking at subprocesses joblist.synchronize(j) except OSError: break
def alive(self, recover=False): """ try to determine if the child process is still active. If not, mark the child as dead and close all IO descriptors etc ("func:`finalize`). If `recover` is `True` and the child is indeed dead, we attempt to re-initialize it (:func:`initialize`). We only do that for so many times (`self.recover_max`) before giving up -- at that point it seems likely that the child exits due to a re-occurring operations condition. Note that upstream consumers of the :class:`PTYProcess` should be careful to only use `recover=True` when they can indeed handle a disconnected/reconnected client at that point, i.e. if there are no assumptions on persistent state beyond those in control of the upstream consumers themselves. """ with self.rlock: # do we have a child which we can check? if self.child: wstat = None while True: # print 'waitpid %s' % self.child # hey, kiddo, whats up? try: wpid, wstat = os.waitpid(self.child, os.WNOHANG) # print 'waitpid %s : %s - %s' % (self.child, wpid, wstat) except OSError as e: if e.errno == errno.ECHILD: # child disappeared, go to zombie cleanup routine break raise ("waitpid failed on wait (%s)" % e) # did we get a note about child termination? if 0 == wpid: # print 'waitpid %s : %s - %s -- none' % (self.child, wpid, wstat) # nope, all is well - carry on return True # Yes, we got a note. # Well, maybe the child fooled us and is just playing dead? if os.WIFSTOPPED (wstat) or \ os.WIFCONTINUED (wstat) : # print 'waitpid %s : %s - %s -- stop/cont' % (self.child, wpid, wstat) # we don't care if someone stopped/resumed the child -- that is up # to higher powers. For our purposes, the child is alive. Ha! continue break # so its dead -- make sure it stays dead, to avoid zombie # apocalypse... # print "he's dead, honeybunny, jim is dead..." self.child = None self.finalize(wstat=wstat) # check if we can attempt a post-mortem revival though if not recover: # print 'not alive, not recover' # nope, we are on holy ground - revival not allowed. return False # we are allowed to revive! So can we try one more time... pleeeease?? # (for cats, allow up to 9 attempts; for Buddhists, always allow to # reincarnate, etc.) if self.recover_attempts >= self.recover_max: # nope, its gone for good - just report the sad news # print 'not alive, no recover anymore' return False # MEDIIIIC!!!! self.recover_attempts += 1 self.initialize() # well, now we don't trust the child anymore, of course! So we check # again. Yes, this is recursive -- but note that recover_attempts get # incremented on every iteration, and this will eventually lead to # call termination (tm). # print 'alive, or not alive? Check again!' return self.alive(recover=True)
def wait(self): """ blocks forever until the child finishes on its own, or is getting killed. Actully, we might just as well try to figure out what is going on on the remote end of things -- so we read the pipe until the child dies... """ output = "" # yes, for ever and ever... while True: try: output += self.read() except: break # yes, for ever and ever... while True: if not self.child: # this was quick ;-) return output # we need to lock, as the SIGCHLD will only arrive once with self.rlock: # hey, kiddo, whats up? try: wpid, wstat = os.waitpid(self.child, 0) except OSError as e: if e.errno == errno.ECHILD: # child disappeared self.exit_code = None self.exit_signal = None self.finalize() return output # no idea what happened -- it is likely bad raise se.NoSuccess("waitpid failed on wait") # did we get a note about child termination? if 0 == wpid: # nope, all is well - carry on continue # Yes, we got a note. # Well, maybe the child fooled us and is just playing dead? if os.WIFSTOPPED (wstat) or \ os.WIFCONTINUED (wstat) : # we don't care if someone stopped/resumed the child -- that is up # to higher powers. For our purposes, the child is alive. Ha! continue # not stopped, poor thing... - soooo, what happened?? But hey, # either way, its dead -- make sure it stays dead, to avoid # zombie apocalypse... self.child = None self.finalize(wstat=wstat) return output
def _run(self, pid): # This is the entry point after running either `start` or `attach`. In # both cases we are already the tracer of `pid`, and the process is # running. # The initial tracee is the only tracee that may stop for other reasons # before `PTRACE_EVENT_STOP`, so we handle it specially here. If the # tracee was created with `Engine.start` then it will stop itself with # `SIGSTOP`, in which case we will observe group-stop. But # `WPTRACEEVENT(status) will also be `PTRACE_EVENT_STOP` in that case, # so there's no reason to distinguish between them. while True: pid_, status = self._wait() if pid != pid_: _debug('child <PID:%d> is not initial tracee' % pid_) continue e = WPTRACEEVENT(status) s = os.WSTOPSIG(status) if os.WIFSTOPPED(status) and e == PTRACE_EVENT_STOP: _debug('seized initial tracee <PID:%d>' % pid) self._new_tracee(pid) break _debug('still waiting for <PID:%d>' % pid) # If this is not group-stop (`e` != 0 and `s` != `SIGTRAP`), # event-stop (`e` != 0 and `s` == `SIGTRAP`) or syscall-stop (`s` & # 0x80), then it must be signal-stop. if not e and not s & 0x80: cont_signal = s else: cont_signal = 0 ptrace_cont(pid, cont_signal) # For a child to become a tracee two things must happen: 1) # `PTRACE_EVENT_STOP` is observed in the child and 2) # `PTRACE_EVENT_{FORK,VFORK,CLONE}`is observed in the parent. The first # condition ensures that the tracee is running and the second lets us # know the parent. # # In the case of the `clone` syscall we must also save the flags used, # as they decide the thread group and parent of the child. # # Caveat: I have only seen `PTRACE_EVENT_STOP` before # `PTRACE_EVENT_{FORK,VFORK,CLONE}` in the case of `vfork`, and not # consistently, but the ptrace man page doesn't say anything about the # order. Besides, there's no reason to rely on it anyway. # `stop_seen` records the children in whom we have observed # `PTRACE_EVENT_STOP`. stop_seen = set() # `parent_seen` maps children to their "parents". Here parent refers to # the process that spawned the child, not the child's parent as reported # by `getppid`. parent_seen = {} # `clone_flags` maps parents to the flags used in the `clone` syscall. # When `clone` is called the PID of the child is not yet known, so the # mapping cannot be from children. clone_flags = {} # This function creates a tracee if the conditions discussed above are # met. def maybe_tracee(pid): if pid not in stop_seen: _debug('PTRACE_EVENT_STOP has not yet been observed in ' '<PID:%d>' % pid) return if pid not in parent_seen: _debug('parent of <PID:%d> has not yet observed ' 'PTRACE_EVENT_{FORK,VFORK,CLONE}' % pid) return parent = parent_seen.pop(pid) cflags = clone_flags.pop(parent.pid, 0) self._new_tracee(pid, parent, cflags) while self.tracees: try: pid, status = self._wait() except OSError as e: if e.errno == errno.ECHILD: # This may happen if all the tracees are killed by SIGKILL, # so we didn't get a change to observe their death. _debug('no children, exiting') break raise _debug('wait() -> %d, %02x|%02x|%02x' % \ (pid, status >> 16, (status >> 8) & 0xff, status & 0xff)) if pid not in self.tracees: # The child is not a tracee. That can happen because 1) it was # created with follow mode disabled, and is not meant to be a # tracee, or 2) this is the first time we see the tracee in # which case we expect to observe `PTRACE_EVENT_STOP` # According to the `wait(2)` man page `WIFSTOPPED(status)` can # only be true if `wait` was called with `UNTRACED` or if the # child is a ptrace tracee. In the second case we must observe # `PTRACE_EVENT_STOP`. if os.WIFSTOPPED(status): assert WPTRACEEVENT(status) == PTRACE_EVENT_STOP, \ 'non-tracee child stopped without PTRACE_EVENT_STOP' assert pid not in stop_seen, \ 'already saw PTRACE_EVENT_STOP for child <PID:%d>' % pid stop_seen.add(pid) # Create and start tracee if we already know the parent maybe_tracee(pid) continue # OK, this is a proper tracee, continue to the real logic. First we # figure out what happened to it. tracee = self.tracees.get(pid) s = os.WSTOPSIG(status) e = WPTRACEEVENT(status) # When we continue the tracee, this is the signal we should send it. cont_signal = 0 # Why did `wait` return this tracee? stopped = False signalled = False continued = False exited = False # If the tracee was stopped, which kind of stop was it? signal_stop = False group_stop = False event_stop = False syscall_stop = False # This will be set if we have event-stop. event = None # Tracers can return a value on syscall-enter in which case the # syscall is "emulated" # XXX: For some reason I couldn't get ptrace_sysemu to work, so I'm # XXX: using User-Mode-Linux's trick and replacing it by a syscall # XXX: to `getpid` instead. See code further down. # XXX: Link to UML's SYSEMU patches: http://sysemu.sourceforge.net/ sysemu = False # Here we just set the variables. The real logic follows below. if os.WIFSTOPPED(status): stopped = True if s == SIGTRAP | 0x80: assert e == 0, \ 'WPTRACEEVENT(status) should be 0 in syscall-stop' syscall_stop = True syscall = tracee.syscall elif e: if s == SIGTRAP: event_stop = True event = e else: assert e == PTRACE_EVENT_STOP, \ 'WPTRACEEVENT(status) should be ' \ 'PTRACE_EVENT_STOP in group-stop' group_stop = True else: signal_stop = True tracee.siginfo._init() siginfo = tracee.siginfo cont_signal = s if os.WIFSIGNALED(status): signalled = True signal = os.WTERMSIG(status) if os.WIFCONTINUED(status): continued = True if os.WIFEXITED(status): exited = True status = os.WEXITSTATUS(status) # Tracee exited or was killed by a signal, so remove it. No need to # report this exit as we have already done so when we got # `PTRACE_EVENT_EXIT`. # # Caveat: At the moment (kernel 4.5.0) tracees stop in event-stop # with `PTRACE_EVENT_EXIT` even if they are killed by `SIGKILL`. # According to the man page that may change in the future. We # handle that hypothetical situation below, if ptrace fails with # `ESRCH` when we continue the tracee. if exited or signalled: _debug('<PID:%d> terminated' % pid) self._del_tracee(tracee) continue # Log events. if event: _debug('event %d:%s' % (event, event_names[event])) # Handle `execve`'s: when a thread which is not the thread group # leader executes `execve`, all other threads in the thread group # die and the `execve`'ing thread becomes leader. This code must be # executed early as `tracee` is in fact the wrong tracee at this # point, and we need to correct that. if event == PTRACE_EVENT_EXEC: oldpid = ptrace_geteventmsg(pid) if pid != oldpid: _debug('repid (%d -> %d)' % (oldpid, pid)) # Neither this tracee nor the thread group leader will # report death, so we must do the clean-up here self._del_tracee(tracee) # This is the correct tracee, it just changed its pid tracee = self.tracees.pop(oldpid) tracee.pid = pid tracee.thread_group = set([tracee]) tracee.tgid = tracee.pid self.tracees[pid] = tracee self._run_callbacks('repid', tracee, oldpid) # We're entering or exiting a syscall so update `in_syscall`. # Invariant: a callback for `syscall` will always see `in_syscall` # as being true, and a callback for `syscall_return` will always see # it as being false. # # This check must be placed here, before the personality is # detected, because a syscall on Linux x86_64 (XXX: and others?) be # run in 32 bit mode depending on how it was called (specifically # through `int 0x80` or 32 bit `syscall` [which apparently only # exists on AMD CPU's and is all but undocumented; see comment in # `/linux/arch/x86/entry/entry_32.S`]). if syscall_stop: # Go from syscall to not in syscall or vice versa tracee.in_syscall ^= True # See if the tracee changed personality. This check may depend on # `in_syscall` (see comment above). self._detect_personality(tracee) # OK, now all the tracee's state variables has been set and we're # ready to fire callbacks, etc. # Trigger single step callbacks. We do not reset # `_was_singlestepped` here because we need it to be set in order to # suppress callbacks for `SIGTRAP`. if tracee._was_singlestepped: _debug('step') self._run_callbacks('step', tracee) # This tracee was stopped, but now it's running again! if not tracee.is_running: tracee.is_running = True self._run_callbacks('cont', tracee) # Handle syscalls. if syscall_stop: # This is syscall-enter. if tracee.in_syscall: # The `nr` and `name` attributes are what the tracer sees # and not the real values in case of a restarted or emulated # syscall. realnr = syscall._get_nr() realname = tracee.syscalls.syscall_names[realnr] _debug('syscal-enter %d:%s' % (realnr, realname)) if self.trace_restart or realname != 'restart_syscall': # Initialize syscall object. syscall._init() args = syscall.args retval = self._run_syscall_callbacks(tracee) # We were supposed to enter a syscall, but a tracer # returned a value, so we'll "emulate" the syscall # instead. if retval != None: _debug('emulating syscall %d:%s -> 0x%x' % \ (syscall.nr, syscall.name, retval)) # XXX: For some reason `ptrace_sysemu` doesn't seem # XXX: to work for me, so I replace the syscall with # XXX: a "nop" syscall in the form of `getpid` syscall.emulated = True syscall.emu_nr = syscall.nr syscall.emu_retval = retval syscall.name = 'getpid' # We are about to enter an un-emulated (if it was # emulated `name` would be "getpid") `clone` syscall and # we want to follow the child. We must save the flags # used in the syscall so we can correctly set the parent # and thread group of the newly created process/thread # when it arrives. Also, if the `CLONE_UNTRACED` flag # is set, we unset it so we become a tracer of the # child. This check needs to be placed here, after the # callbacks have run, as the syscall may be changed or # simulated by a tracer. `CLONE_UNTRACED` is defined in # /usr/include/linux/sched.h CLONE_UNTRACED = 0x00800000 if self.follow and syscall.name == 'clone': if syscall.args[0] & CLONE_UNTRACED: _debug('removed CLONE_UNTRACED in clone ' \ 'syscall') clone_flags[pid] = syscall.args[0] syscall.args[0] &= ~CLONE_UNTRACED else: _debug('ignoring syscall-enter due to restart') # This is syscall-exit. else: _debug('syscall-exit %d:%s -> %#x' % \ (syscall.nr, syscall.name, syscall.retval)) if self.trace_restart or \ syscall.retval not in RETVAL_RESTART: # Finalize syscall object, i.e. stop the timer. syscall._fini() # This is the initial tracee returning from `execve` if self._wait_initial: self._wait_initial = False self._run_callbacks('birth', tracee) else: # Set syscall number and return value if the syscall # was "emulated", i.e. `getpid`. if syscall.emulated: syscall.nr = syscall.emu_nr syscall.retval = syscall.emu_retval retval = self._run_syscall_callbacks(tracee) if retval != None: _debug('overriding syscall %d:%s -> 0x%x' % \ (syscall.nr, syscall.name, retval)) syscall.retval = retval # The `emulated` flag is reset here, after the # callbacks have run, so they can see whether the # syscall was emulated or not. syscall.emulated = False else: _debug('ignoring syscall-exit due to restart') # Run callbacks for signals and single stepping. elif signal_stop: # A single stepped tracee will signal-stop with `SIGTRAP` when # executing the next instruction, so we need to suppress that # signal. Otherwise run callbacks and deliver signals as usual. if siginfo.signo == SIGTRAP and tracee._was_singlestepped: cont_signal = 0 else: _debug('signal %d:%s' % (siginfo.signo, siginfo.signame)) retval = self._run_signal_callbacks(tracee) if retval != None: if retval == 0: _debug('supressing signal %d:%s' % \ (siginfo.signo, siginfo.signame)) else: _debug('overriding signal %d:%s -> %d:%s' % \ (siginfo.signo, siginfo.signame, retval, signal_names.get(retval, 'SIG???'))) cont_signal = retval # Ditto for group-stops. elif group_stop: _debug('group-stop') tracee.is_running = False self._run_callbacks('stop', tracee) # Handle births. elif event in PTRACE_EVENTS_FOLLOW: newpid = ptrace_geteventmsg(pid) # Even if the child is not the result of a `clone` syscall, we # may have saved clone flags if a previous `clone` failed. In # that case we must remove the stale flags. if event != PTRACE_EVENT_CLONE: clone_flags.pop(pid, None) # Record this tracee as the parent. parent_seen[newpid] = tracee # And finally create and start the new tracee if # `PTRACE_EVENT_STOP` was already seen (as mentioned above, I've # only seen this behavior from `vfork`). maybe_tracee(newpid) # Handle deaths. elif event == PTRACE_EVENT_EXIT: status = ptrace_geteventmsg(pid) tracee.is_running = False tracee.is_alive = False self._run_callbacks('death', tracee, status) # Should the tracee be single stepped? do_singlestep = self.singlestep or tracee.singlestep or \ tracee.singlesteps > 0 # If so, we need a to know whether we're about to make a syscall or # not, and figuring that out probably requires reading the tracee's # registers and/or memory, so we should do it here, before we flush # the register, memory and siginfo caches. if do_singlestep: at_syscall = tracee.at_syscall # And now we can flush them. tracee._cacheflush() # Continue the tracee. try: if group_stop: ptrace_listen(pid) elif sysemu: # XXX: See comments about `ptrace_sysemu` above. ptrace_sysemu(pid, cont_signal) elif tracee._do_detach: _debug('detached <PID:%d>' % pid) ptrace_detach(pid, cont_signal) # Continue the tracee. _tgkill(tracee.tgid, tracee.tid, SIGCONT) elif do_singlestep: # For each tracee we record whether it was single stepped # since any of the variables above may change before we # observe SIGTRAP and thus cannot be relied on tracee._was_singlestepped = True # We decrement `singlesteps` here so changes made by later # callbacks will not be affected. if tracee.singlesteps > 0: tracee.singlesteps -= 1 # If we're entering or exiting a syscall we must continue # the tracee with `PTRACE_SYSCALL` in order to observe that. if at_syscall or tracee.in_syscall: ptrace_syscall(pid, cont_signal) else: ptrace_singlestep(pid, cont_signal) else: tracee._was_singlestepped = False ptrace_syscall(pid, cont_signal) except OSError as e: if e.errno == errno.ESRCH: # This doesn't happen at the moment (kernel 4.5.0), but it # may in the future. See the BUGS section in the ptrace man # page. del self.tracees[pid] self._run_callbacks('kill', tracee) else: raise self._run_callbacks('finish')
os.chdir('/') os.setsid() os.umask(0) try: pid = os.fork() if pid > 0: time.sleep(0.2) id, status = os.waitpid(pid, os.WNOHANG) if id == 0: sys.exit(0) print '-->', id, status print 'WCOREDUMP', os.WCOREDUMP(status) print 'WIFCONTINUED', os.WIFCONTINUED(status) print 'WIFSTOPPED', os.WIFSTOPPED(status) print 'WIFSIGNALED', os.WIFSIGNALED(status) print 'WIFEXITED', os.WIFEXITED(status) print 'WEXITSTATUS', os.WEXITSTATUS(status) print 'WSTOPSIG', os.WSTOPSIG(status) print 'WTERMSIG', os.WTERMSIG(status) sys.exit(0) except OSError, e: print >> sys.stderr, 'fork() ERROR:', e.errno, e.strerror sys.exit(1) pid = os.getpid() print 'River daemon pid', pid
def main(self, command): """ start the trace with the given command :type command: string :param command: command line to trace passed through shlex.split :rtype: bool :return: false if something went wrong """ import ctypes from FingerPrint.ptrace import func as ptrace_func import FingerPrint.ptrace.cpu_info import FingerPrint.ptrace.signames files = {} # # main function to launch a process and trace it # returnValue = False self.program = command # this is to check if we are entering or returning from a system call processesStatus = dict() options = ptrace_func.PTRACE_O_TRACEFORK | ptrace_func.PTRACE_O_TRACEVFORK \ | ptrace_func.PTRACE_O_TRACECLONE | ptrace_func.PTRACE_O_TRACEEXIT \ | ptrace_func.PTRACE_O_TRACEEXEC | ptrace_func.PTRACE_O_TRACESYSGOOD #TODO add the logger #logger = getLogger() #logger.setLevel(DEBUG) # creating the debugger and setting it up child = os.fork() if child == 0: # we are in the child or traced process # traceme and execv ptrace_func.ptrace_traceme() os.execl(FingerPrint.utils.which(self.program[0]), *self.program) else: # father or tracer process # we trace the execution here logger.debug("The fingerprint process %d going to trace %d" % (os.getpid(), child)) pid, status = os.waitpid(-1, 0) if pid != child: logger.error("The process tracer could not bootstrap.") return False ptrace_func.ptrace_setoptions(child, options) ptrace_func.ptrace_syscall(child) files = TracerControlBlock.files TracerControlBlock.set_trace_function() while True: # main loop tracer # 1. wait for syscall from the children # 2. analyze what happen, if mmap syscall scan /proc/PID/maps # 3. get ready to wait for the next syscall try: # wait for all cloned children __WALL = 0x40000000 (pid, status) = os.waitpid(-1, 0x40000000) except OSError: logger.error("Tracing terminated successfully") return True if not pid > 0: logger.error("Catastrofic failure") return False event = status >> 16 signalValue = os.WSTOPSIG(status) deliverSignal = 0 if os.WIFEXITED(status): # a process died, report it and go back to wait for syscall logger.debug("The process " + str(pid) + " exited") processesStatus.pop(pid) continue if os.WIFSIGNALED(status): logger.debug("The process " + str(pid) + " exited because of a signal") processesStatus.pop(pid) continue if os.WIFCONTINUED(status): logger.debug("The process " + str(pid) + " continued") elif os.WIFSTOPPED(status) and signalValue == (signal.SIGTRAP | 0x80): # # we have a syscall # orig_rax or orig_eax contains the syscall number # taken from linux src arch/x86/syscalls/syscall_[32|64].tbl # switch on the syscal number to intercept mmap and open regs = ptrace_func.ptrace_getregs(pid) if pid not in processesStatus: #new pid tcb = TracerControlBlock(pid) processesStatus[pid] = tcb if (FingerPrint.ptrace.cpu_info.CPU_X86_64 and regs.orig_rax == 2) or \ (FingerPrint.ptrace.cpu_info.CPU_I386 and regs.orig_eax == 5):# or regs.orig_rax == 257): # # handle open (orig_rax == 2 on 64bit) or (orig_eax == 5 on 32bit) # if processesStatus[pid].enterCall: # we are entering open, regs.rsi contains the first arguments for 64bit # https://github.com/torvalds/linux/blob/master/arch/x86/kernel/entry_64.S#L585 # ebx if for 32 bits http://man7.org/linux/man-pages/man2/syscall.2.html if FingerPrint.ptrace.cpu_info.CPU_X86_64: processesStatus[pid].firstArg = regs.rdi else: processesStatus[pid].firstArg = regs.ebx processesStatus[pid].enterCall = False else: # we are exiting from a open processesStatus[pid].enterCall = True # cast from c_ulong to c_long if FingerPrint.ptrace.cpu_info.CPU_X86_64: ret_value = regs.rax else: ret_value = regs.eax returnValue = ctypes.c_long(ret_value).value if returnValue >= 0: openPath = self.readCString( processesStatus[pid].firstArg, pid) if openPath[0] != '/': #relative path we need to get the pwd openPath = "$" + processesStatus[ pid].getProcessCWD() + "$" + openPath libName = processesStatus[pid].getFileOpener() if libName not in files: files[libName] = {} if processesStatus[pid].getProcessName( ) not in files[libName]: files[libName][processesStatus[pid]. getProcessName()] = set() files[libName][processesStatus[pid]. getProcessName()].add(openPath) # else don't do anything # TODO use close to check for used files (easier to trace full path) elif (FingerPrint.ptrace.cpu_info.CPU_X86_64 and regs.orig_rax == 9)\ or (FingerPrint.ptrace.cpu_info.CPU_I386 and \ (regs.orig_eax == 90 or regs.orig_eax == 192 ) ): # # handle mmap (orig_rax == 9 64bit or orig_eax == 90 or 192 on 32bit) # if processesStatus[pid].enterCall: # we are entering mmap processesStatus[pid].enterCall = False #print "the process %d enter mmap" % pid else: # we are returning from mmap processesStatus[pid].enterCall = True processesStatus[pid].updateSharedLibraries() elif os.WIFSTOPPED(status) and ( signalValue == signal.SIGTRAP) and event != 0: # this is just to print some output to the users subChild = ptrace_func.ptrace_geteventmsg(pid) if event == ptrace_func.PTRACE_EVENT_FORK: logger.debug("The process %d forked a new process %d" % (pid, subChild)) elif event == ptrace_func.PTRACE_EVENT_VFORK: logger.debug( "The process %d vforked a new process %d" % (pid, subChild)) elif event == ptrace_func.PTRACE_EVENT_CLONE: logger.debug("The process %d cloned a new process %d" % (pid, subChild)) elif event == ptrace_func.PTRACE_EVENT_EXEC: logger.debug("The process %d run exec" % (pid)) processesStatus[pid].updateProcessInfo() elif event == ptrace_func.PTRACE_EVENT_EXIT: pass #print "the process %d is in a event exit %d" % (pid, subChild) elif os.WIFSTOPPED(status): # when a signal is delivered to one of the child and we get notified # we need to relay it properly to the child # (in particular SIGCHLD must be rerouted to the parents if not mpirun # will never end) logger.debug("Signal %s(%d) delivered to %d " % \ (FingerPrint.ptrace.signames.signalName(signalValue), signalValue, pid)) deliverSignal = signalValue else: logger.debug("This should not happen!!") # set the ptrace option and wait for the next syscall notification #ptrace_func.ptrace_setoptions(pid, options); ptrace_func.ptrace_syscall(pid, deliverSignal)
def continued(self): return os.WIFCONTINUED(self)
def __sig_child_handler(self, signum, frame): # Our child exits with sig 9 when all is good... so map that to 0 ret = 0 pid = None try: status = None sig = None core = False self.logger.debug("Running children: %s" % self.executor_pids) self.logger.debug("Got signal %s" % signum) pid, ret = os.wait() self.logger.debug("After wait") msg = "Child %s: wait returned code %s which means:" % (pid, ret) if os.WIFSIGNALED(ret): sig = os.WTERMSIG(ret) msg += " signalled %s" % sig if os.WIFEXITED(ret): status = os.WEXITSTATUS(ret) msg += " exited %s" % status if os.WIFSTOPPED(ret): msg += " stopped %s" % os.WSTOPSIG(ret) if os.WCOREDUMP(ret): core = True msg += " core dumped" if os.WIFCONTINUED(ret): msg += " contunied" self.logger.debug(msg) if pid in self.executor_pids: self.executor_pids.remove(pid) self.executor_rets.append((status, sig, core)) else: self.logger.error("Pid %s is not a child" % pid) # sometimes signal handler is not called, clean here zombies for pid in self.executor_pids: p, r = os.waitpid(pid, os.WNOHANG) if p != 0: self.logger.debug( "Zombie with pid %d found, exit code=%d" % (p, r)) self.executor_pids.remove(pid) #self.executor_rets.append((status, sig, core)) ret = 0 if len(self.executor_pids) == 0: self.logger.trace("Statuses of all executors: %s" % self.executor_rets) for st, sg, co in self.executor_rets: if st is not None and st != 0: ret = st if co: ret = 1 self.logger.info("Exit with code %s" % ret) sys.exit(ret) except Exception, ex: self.logger.error("Error waiting for child process: %s" % ex) if len(self.executor_pids) <= 1: self.logger.warn( "No more child processes, exit with success: pids=%s, last pid=%s, ret=%s" % (self.executor_pids, pid, ret)) sys.exit(0) else: self.logger.info("Children left: %s" % self.executor_pids)