class SystemSSH(object): def __init__(self, host=None, port=22, username=None, passwd=None): self.host = host self.port = port if port is not None else 22 self.username = username self.passwd = passwd self.blocking = True self.timeout = None self.path = "/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin" self._ssh_connect_timeout = 30 self._ssh_auth_timeout = 60 self._ssh_child_pid = None self._ssh_master_pty_fd = None self.shell_prompt = None self._ssh_cmd = shlex.split( """ssh -t '-o ConnectTimeout=%s' '-o Protocol=2,1' '-o StrictHostKeyChecking=no' '-o PreferredAuthentications=password,keyboard-interactive' '-o NumberOfPasswordPrompts=1' '-o ControlMaster=no' '-o LogLevel=INFO' '-p %s' %s@%s""" % (self._ssh_connect_timeout, self.port, self.username, self.host)) @property def opened(self): try: self._check_child_state() except (SSHProcessException, SSHNotReady): return False return True @property def closed(self): return self.opened == False def _check_child_basic(self): if self._ssh_master_pty_fd is None or self._ssh_child_pid is None: raise SSHNotReady("SSH connection is not started/ready") def _check_child_state(self): self._check_child_basic() try: pid, exit_st = os.waitpid(self._ssh_child_pid, os.WNOHANG) except OSError, e: if e.errno == errno.ECHILD: self._reset() raise SSHProcessException("Error on os.waitpid(%s): %s" % (self._ssh_child_pid, e)) if pid == 0 and exit_st == 0: return ssh_pid = self._ssh_child_pid ssh_exit_code = None ssh_signal = None ssh_err_msg = "SSH process (pid='%s') has died:" % ssh_pid self._reset() if os.WIFSIGNALED(exit_st): ssh_signal = os.WTERMSIG(exit_st) ssh_err_msg = "%s killed with signal '%s'" % (ssh_err_msg, ssh_signal) elif os.WIFEXITED(exit_st): ssh_exit_code = os.WEXITSTATUS(exit_st) ssh_err_msg = "%s exited with code '%s'" % (ssh_err_msg, ssh_exit_code) raise SSHProcessException(ssh_err_msg, pid=ssh_pid, exit_code=ssh_exit_code, signal=ssh_signal)
def fork_processes(num_processes, max_restarts=100): """Starts multiple worker processes. If ``num_processes`` is None or <= 0, we detect the number of cores available on this machine and fork that number of child processes. If ``num_processes`` is given and > 0, we fork that specific number of sub-processes. Since we use processes and not threads, there is no shared memory between any server code. Note that multiple processes are not compatible with the autoreload module (or the debug=True option to `tornado.web.Application`). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. In each child process, ``fork_processes`` returns its *task id*, a number between 0 and ``num_processes``. Processes that exit abnormally (due to a signal or non-zero exit status) are restarted with the same id (up to ``max_restarts`` times). In the parent process, ``fork_processes`` returns None if all child processes have exited normally, but will otherwise only exit by throwing an exception. """ global _task_id assert _task_id is None if num_processes is None or num_processes <= 0: num_processes = cpu_count() if ioloop.IOLoop.initialized(): raise RuntimeError( "Cannot run in multiple processes: IOLoop instance " "has already been initialized. You cannot call " "IOLoop.instance() before calling start_processes()") gen_log.info("Starting %d processes", num_processes) children = {} def start_child(i): pid = os.fork() if pid == 0: # child process _reseed_random() global _task_id _task_id = i return i else: children[pid] = i return None for i in range(num_processes): id = start_child(i) if id is not None: return id num_restarts = 0 while children: try: pid, status = os.wait() except OSError as e: if e.errno == errno.EINTR: continue raise if pid not in children: continue id = children.pop(pid) if os.WIFSIGNALED(status): gen_log.warning( "child %d (pid %d) killed by signal %d, restarting", id, pid, os.WTERMSIG(status)) elif os.WEXITSTATUS(status) != 0: gen_log.warning( "child %d (pid %d) exited with status %d, restarting", id, pid, os.WEXITSTATUS(status)) else: gen_log.info("child %d (pid %d) exited normally", id, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("Too many child restarts, giving up") new_id = start_child(id) if new_id is not None: return new_id # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). sys.exit(0)
def popen_sync(command, env=None, stdout=True, stderr=True, retcode=True, cd=None, su=None): """This function implements a subset of the functionality provided by the subprocess.Popen class. The subprocess module in Python 2.4 and 2.5 have some problems dealing with processes termination on multi-thread environments (593800, 1731717).""" stdin_r, stdin_w = os.pipe() stdout_r, stdout_w = os.pipe() stderr_r, stderr_w = os.pipe() pid = os.fork() if pid == 0: # Close parent's pipe ends os.close(stdin_w) os.close(stdout_r) os.close(stderr_r) # Dup fds for child os.dup2(stdin_r, 0) os.dup2(stdout_w, 1) os.dup2(stderr_w, 2) # Close fds for i in range(3, MAXFD): try: os.close(i) except: pass # Change directory if cd: try: os.chdir(cd) except Except as e: print("Could not change directory to: %s" % (cd), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) # Change user if su: try: os.setuid(su) except Except as e: print("Could not set user: %s" % (su), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) # Pass control to the executable if not env: os.execv('/bin/sh', ['sh', '-c', command]) else: os.execve('/bin/sh', ['sh', '-c', command], env) # Poll on child's process outputs buf_stderr = '' buf_stdout = '' set_non_blocking(stdout_r) set_non_blocking(stderr_r) # Read stdout and stderr while True: rlist, wlist, xlist = select.select([stdout_r, stderr_r], [], [stdout_r, stderr_r], POLLING_LAPSE) if stderr_r in rlist: data = '' try: data = os.read(stderr_r, READ_CHUNK_SIZE) except OSError as e: if e[0] in (errno.EAGAIN, ): raise if data: buf_stderr += data if stdout_r in rlist: data = '' try: data = os.read(stdout_r, READ_CHUNK_SIZE) except OSError as e: if e[0] in (errno.EAGAIN, ): raise if data: buf_stdout += data # Has it finished? try: pid_ret, sts = os.waitpid(pid, os.WNOHANG) if pid_ret == pid: if os.WIFSIGNALED(sts): returncode = -os.WTERMSIG(sts) elif os.WIFEXITED(sts): returncode = os.WEXITSTATUS(sts) break except OSError as e: returncode = None break # Clean up os.close(stdin_w) os.close(stderr_r) os.close(stdout_r) os.close(stdin_r) os.close(stderr_w) os.close(stdout_w) # Return information ret = { 'command': command, 'stdout': buf_stdout, 'stderr': buf_stderr, 'retcode': returncode } return ret
def exit_signalled(s): """ child terminated due to receipt of SIGUSR1 """ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
def fork_processes( num_processes: Optional[int], max_restarts: Optional[int] = None ) -> int: """Starts multiple worker processes. If ``num_processes`` is None or <= 0, we detect the number of cores available on this machine and fork that number of child processes. If ``num_processes`` is given and > 0, we fork that specific number of sub-processes. Since we use processes and not threads, there is no shared memory between any server code. Note that multiple processes are not compatible with the autoreload module (or the ``autoreload=True`` option to `tornado.web.Application` which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. In each child process, ``fork_processes`` returns its *task id*, a number between 0 and ``num_processes``. Processes that exit abnormally (due to a signal or non-zero exit status) are restarted with the same id (up to ``max_restarts`` times). In the parent process, ``fork_processes`` calls ``sys.exit(0)`` after all child processes have exited normally. max_restarts defaults to 100. Availability: Unix """ if sys.platform == "win32": # The exact form of this condition matters to mypy; it understands # if but not assert in this context. raise Exception("fork not available on windows") if max_restarts is None: max_restarts = 100 global _task_id assert _task_id is None if num_processes is None or num_processes <= 0: num_processes = cpu_count() gen_log.info("Starting %d processes", num_processes) children = {} def start_child(i: int) -> Optional[int]: pid = os.fork() if pid == 0: # child process _reseed_random() global _task_id _task_id = i return i else: children[pid] = i return None for i in range(num_processes): id = start_child(i) if id is not None: return id num_restarts = 0 while children: pid, status = os.wait() if pid not in children: continue id = children.pop(pid) if os.WIFSIGNALED(status): gen_log.warning( "child %d (pid %d) killed by signal %d, restarting", id, pid, os.WTERMSIG(status), ) elif os.WEXITSTATUS(status) != 0: gen_log.warning( "child %d (pid %d) exited with status %d, restarting", id, pid, os.WEXITSTATUS(status), ) else: gen_log.info("child %d (pid %d) exited normally", id, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("Too many child restarts, giving up") new_id = start_child(id) if new_id is not None: return new_id # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). sys.exit(0)
def inhibit(self, *a): """inhibit a gluster filesystem Mount glusterfs over a temporary mountpoint, change into the mount, and lazy unmount the filesystem. """ mpi, mpo = os.pipe() mh = Popen.fork() if mh: os.close(mpi) fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC) d = None margv = self.make_mount_argv(*a) if self.mntpt: # mntpt is determined pre-mount d = self.mntpt os.write(mpo, d + '\0') po = Popen(margv, **self.mountkw) self.handle_mounter(po) po.terminate_geterr() logging.debug('auxiliary glusterfs mount in place') if not d: # mntpt is determined during mount d = self.mntpt os.write(mpo, d + '\0') os.write(mpo, 'M') t = syncdutils.Thread(target=lambda: os.chdir(d)) t.start() tlim = gconf.starttime + int(gconf.connection_timeout) while True: if not t.isAlive(): break if time.time() >= tlim: syncdutils.finalize(exval=1) time.sleep(1) os.close(mpo) _, rv = syncdutils.waitpid(mh, 0) if rv: rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \ (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0) logging.warn('stale mount possibly left behind on ' + d) raise GsyncdError("cleaning up temp mountpoint %s " "failed with status %d" % (d, rv)) else: rv = 0 try: os.setsid() os.close(mpo) mntdata = '' while True: c = os.read(mpi, 1) if not c: break mntdata += c if mntdata: mounted = False if mntdata[-1] == 'M': mntdata = mntdata[:-1] assert(mntdata) mounted = True assert(mntdata[-1] == '\0') mntpt = mntdata[:-1] assert(mntpt) if mounted: po = self.umount_l(mntpt) po.terminate_geterr(fail_on_err=False) if po.returncode != 0: po.errlog() rv = po.returncode self.cleanup_mntpt(mntpt) except: logging.exception('mount cleanup failure:') rv = 200 os._exit(rv) logging.debug('auxiliary glusterfs mount prepared')
def wait_pid(pid, timeout=None): """Wait for process with pid 'pid' to terminate and return its exit status code as an integer. If pid is not a children of os.getpid() (current process) just waits until the process disappears and return None. If pid does not exist at all return None immediately. Raise TimeoutExpired on timeout expired. """ def check_timeout(delay): if timeout is not None: if timer() >= stop_at: raise TimeoutExpired(pid) time.sleep(delay) return min(delay * 2, 0.04) timer = getattr(time, 'monotonic', time.time) if timeout is not None: waitcall = lambda: os.waitpid(pid, os.WNOHANG) stop_at = timer() + timeout else: waitcall = lambda: os.waitpid(pid, 0) delay = 0.0001 while 1: try: retpid, status = waitcall() except OSError: err = sys.exc_info()[1] if err.errno == errno.EINTR: delay = check_timeout(delay) continue elif err.errno == errno.ECHILD: # This has two meanings: # - pid is not a child of os.getpid() in which case # we keep polling until it's gone # - pid never existed in the first place # In both cases we'll eventually return None as we # can't determine its exit status code. while 1: if pid_exists(pid): delay = check_timeout(delay) else: return else: raise else: if retpid == 0: # WNOHANG was used, pid is still running delay = check_timeout(delay) continue # process exited due to a signal; return the integer of # that signal if os.WIFSIGNALED(status): return os.WTERMSIG(status) # process exited using exit(2) system call; return the # integer exit(2) system call has been called with elif os.WIFEXITED(status): return os.WEXITSTATUS(status) else: # should never happen raise RuntimeError("unknown process exit status")
def java(self): ofile = TemporaryFile('w+t') if self.ua: dst = ANSWER_PATH + self.id else: dst = BINARY_PATH + self.id # careful about the maxheapsize for JVM,should be set cmd = ['java', '-XX:MaxHeapSize=1024m','-cp', dst, 'Main'] p = Popen(cmd, stdin=self.ifile, stdout=ofile, preexec_fn=Tester.Limiter(self.cpu_limit*2, -1),universal_newlines=True,stderr=DEVNULL) waitResult = os.wait4(p.pid,0) self.cpu_used = waitResult[2].ru_utime + waitResult[2].ru_stime self.memory_used = waitResult[2].ru_maxrss # print(str(self.cpu_used)+' '+str(self.memory_used)) self.return_status = waitResult[1] # print(waitResult) # end with a signal if os.WIFSIGNALED(self.return_status): self.return_code = os.WTERMSIG(self.return_status) if self.return_code == signal.SIGXCPU or self.cpu_used>=self.cpu_limit : self.cpu_used = self.cpu_limit self.result = -5 # print('TLE') elif self.return_code == signal.SIGSEGV: if self.memory_used > self.memory_limit/1024: self.result = -6 # print('MLE') else: self.result = -3 # print('RE') elif self.return_code == signal.SIGKILL: # print('killed') if self.memory_used > self.memory_limit/1024: self.result = -6 # print('MLE') else: self.result = -3 # print('RE') else: print('LOG NEW ERROR '+str(self.return_code)) # end with 0 or other error else: if self.return_status == 0: if self.memory_used > self.memory_limit/1024: self.result = -6 # print('MLE') else: ofile.seek(0) ofile_string = str(ofile.read(-1)).strip() output_string = self.output.strip() ofile_string = ofile_string.replace(chr(13),'') output_string = output_string.replace(chr(13),'') # print(ofile_string+' '+output_string) if ofile_string != output_string: self.result = -7 # print('WA') else: self.result = 0 # print('AC') else: if self.memory_used > self.memory_limit/1024: self.result = -6 # print('MLE') else: self.result = -3
try: pid, status = os.waitpid(pid, 0) except OSError, exc: import errno if exc.errno == errno.EINTR: continue if not DEBUG: cmd = executable raise DistutilsExecError, \ "command %r failed: %s" % (cmd, exc[-1]) if os.WIFSIGNALED(status): if not DEBUG: cmd = executable raise DistutilsExecError, \ "command %r terminated by signal %d" % \ (cmd, os.WTERMSIG(status)) elif os.WIFEXITED(status): exit_status = os.WEXITSTATUS(status) if exit_status == 0: return # hey, it succeeded! else: if not DEBUG: cmd = executable raise DistutilsExecError, \ "command %r failed with exit status %d" % \ (cmd, exit_status) elif os.WIFSTOPPED(status): continue
def compiler_job(self): """This method is called when a remote client ask a compiler daemon to compile some source code, and send it back the results of the compilation (object files generally). Return 0 if everything has been done without any error, and other values otherwise. """ if __debug__: print >> sys.stderr, "Execution d'un job de compilation" syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, "Launching compilation job.") # receive the length of the command line to be received compiler_command_line = ProtocolUtils.recv_data(self.__client_socket) if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, "Compilation command line received : "\ + compiler_command_line) compiler_command = CompilerCommandFactory.build_compiler_instance\ (compiler_command_line.split()) # get the content of the input files used in the command line we # have just received input_temp_files = {} output_temp_files = {} command = self.__client_socket.recv(Protocol.COMMAND_TYPE_SIZE) while command == Protocol.FILE_COMMAND: file_name = ProtocolUtils.recv_data(self.__client_socket) # FIXME: do we need to create the file inside the # critical section ? RemoteJob.tmp_file_creation_lock.acquire() if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON,\ "Creating temporary file.") tmp_file_name = tempfile.mktemp(\ FileUtils.get_file_extension(file_name)) tmp_file = open(tmp_file_name, 'w') RemoteJob.tmp_file_creation_lock.release() if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON,\ "Temporary file created.") input_temp_files[file_name] = tmp_file_name tmp_file.write(ProtocolUtils.recv_data(self.__client_socket)) tmp_file.flush() tmp_file.close() command = self.__client_socket.recv(Protocol.COMMAND_TYPE_SIZE) # replace original input files in the command line by the # temporary ones compiler_command.replace_input_files(input_temp_files) if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, \ "New compilation command line :"\ + " ".join(compiler_command.get_command_args())) # FIXME We should not use "-o" here, this is compiler dependant if "-o" in compiler_command.get_command_args(): if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, \ "-o option in command line.") try: index_output = compiler_command.get_command_args().index("-o") \ + 1 # FIXME: do we need to create the file inside the # critical section ? RemoteJob.tmp_file_creation_lock.acquire() tmp_output_file = tempfile.mktemp() tmp_output_file_hdl = open(tmp_output_file, 'w') tmp_output_file_hdl.close() RemoteJob.tmp_file_creation_lock.release() # associate the output tmp file with the original one output_file_name = compiler_command.get_command_args()\ [index_output] output_temp_files[output_file_name] = tmp_output_file # replace the output file name in the command line compiler_command.get_command_args(\ )[index_output] = tmp_output_file except IndexError: # if there is no file name after the -o option, # it means that the command line is wrong, but we # must execute it in order to send the error # msg back to the client pass else: # no output file specified with -o switch if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, \ "No -o option in command line.") for original_input_file_name in input_temp_files.keys(): stop_step = compiler_command.get_stop_step() orig_output_file_name = compiler_command.\ get_output_file_name_for_step(\ original_input_file_name,\ stop_step) output_temp_files[\ orig_output_file_name] = compiler_command.\ get_output_file_name_for_step(\ input_temp_files[original_input_file_name],\ stop_step) if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, \ "File to return to the client : "\ + output_temp_files[orig_output_file_name]) # execute the command in a subshell and get the stdout and stderr output if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON, \ "Executing the following command line : " \ + compiler_command.get_local_compiler_path() + " " +\ " ".join(compiler_command.get_command_args()[1:])) #proc = popen2.Popen3(" ".join(compiler_command.get_command_args()), 1) # FIXME : VERY IMPORTANT # uncomment the next two line to replace the previous one proc = popen2.Popen3(compiler_command.get_local_compiler_path() + " "\ + " ".join(compiler_command.get_command_args()[1:]), 1) msg_stderr = proc.childerr.read() msg_stdout = proc.fromchild.read() proc.childerr.close() proc.fromchild.close() exit_code = proc.wait() self.send_output_messages(msg_stdout, msg_stderr, input_temp_files,\ output_temp_files) if os.WIFEXITED(exit_code): exit_code = os.WEXITSTATUS(exit_code) if os.WIFSIGNALED(exit_code): exit_code = os.WTERMSIG(exit_code) if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON,\ "Exit code : " + str(exit_code)) if exit_code == 0: # send the result (output files and output messages) to the client self.send_result_files_back_to_client(input_temp_files,\ output_temp_files) if __debug__: syslog.syslog(syslog.LOG_DEBUG | syslog.LOG_DAEMON,\ "Output files sent.") self.send_exit_code(exit_code) self.set_and_send_nb_job_to_scheduler()
def cxx(self): ofile = TemporaryFile('w+t') if self.ua: bin = ANSWER_PATH + self.id + '/x' + self.id else: bin = BINARY_PATH + self.id + '/x' + self.id #print('Current user:'******'sudo','su','nobody','-c',bin] #print(cmd) p = Popen(cmd, stdin=self.ifile, stdout=ofile, preexec_fn=Tester.Limiter(self.cpu_limit+0.5, self.memory_limit*2),universal_newlines=True, stderr=DEVNULL) waitResult = os.wait4(p.pid,0) print('WaitResult:') print(waitResult) self.cpu_used = waitResult[2].ru_utime + waitResult[2].ru_stime # print(self.cpu_used) self.memory_used = waitResult[2].ru_maxrss print(' cpu_used:'+str(self.cpu_used)+' limit: '+str(self.cpu_limit)) print(' mem_used:'+str(self.memory_used)+' limit: '+str(self.memory_limit)) self.return_status = waitResult[1] # print(waitResult) # end with a signal if os.WIFSIGNALED(self.return_status): self.return_code = os.WTERMSIG(self.return_status) if self.return_code == signal.SIGXCPU or self.cpu_used>self.cpu_limit : self.cpu_used = self.cpu_limit self.result = -5 # print('TLE') elif self.return_code == signal.SIGSEGV: if self.memory_used > self.memory_limit: self.result = -6 # print('MLE') else: self.result = -3 # print('RE') elif self.return_code == signal.SIGKILL: # print('killed') if self.memory_used > self.memory_limit: self.result = -6 # print('MLE') else: self.result = -3 # print('RE') else: print('LOG NEW ERROR '+str(self.return_code)) # end with 0 or other error else: if self.return_status == 0: if self.cpu_used>self.cpu_limit : self.cpu_used = self.cpu_limit self.result = -5 # print('TLE') elif self.memory_used > self.memory_limit: self.result = -6 # print('MLE') else: ofile.seek(0) ofile_string = str(ofile.read(-1)).strip() output_string = self.output.strip() ofile_string = ofile_string.replace(chr(13),'') output_string = output_string.replace(chr(13),'') # print(ofile_string+' '+output_string) # for i in ofile_string: # print(ord(i)) # for i in output_string: # print(ord(i)) if ofile_string != output_string: if ''.join(ofile_string.split()) == ''.join(output_string.split()): self.result = -8 # print('PE') else: self.result = -7 # print('WA') else: self.result = 0 # print('AC') else: if self.memory_used > self.memory_limit: self.result = -6 # print('MLE') elif self.cpu_used>=self.cpu_limit : self.cpu_used = self.cpu_limit self.result = -5 # print('TLE') else: self.result = -3
def _spawn_posix(cmd, search_path=1, verbose=0, dry_run=0): log.info(' '.join(cmd)) if dry_run: return executable = cmd[0] exec_fn = search_path and os.execvp or os.execv env = None if sys.platform == 'darwin': global _cfg_target, _cfg_target_split if _cfg_target is None: _cfg_target = sysconfig.get_config_var( 'MACOSX_DEPLOYMENT_TARGET') or '' if _cfg_target: _cfg_target_split = [int(x) for x in _cfg_target.split('.')] if _cfg_target: # ensure that the deployment target of build process is not less # than that used when the interpreter was built. This ensures # extension modules are built with correct compatibility values cur_target = os.environ.get('MACOSX_DEPLOYMENT_TARGET', _cfg_target) if _cfg_target_split > [int(x) for x in cur_target.split('.')]: my_msg = ('$MACOSX_DEPLOYMENT_TARGET mismatch: ' 'now "%s" but "%s" during configure' % (cur_target, _cfg_target)) raise DistutilsPlatformError(my_msg) env = dict(os.environ, MACOSX_DEPLOYMENT_TARGET=cur_target) exec_fn = search_path and os.execvpe or os.execve pid = os.fork() if pid == 0: # in the child try: if env is None: exec_fn(executable, cmd) else: exec_fn(executable, cmd, env) except OSError as e: if not DEBUG: cmd = executable sys.stderr.write("unable to execute %r: %s\n" % (cmd, e.strerror)) os._exit(1) if not DEBUG: cmd = executable sys.stderr.write("unable to execute %r for unknown reasons" % cmd) os._exit(1) else: # in the parent # Loop until the child either exits or is terminated by a signal # (ie. keep waiting if it's merely stopped) while True: try: pid, status = os.waitpid(pid, 0) except OSError as exc: if not DEBUG: cmd = executable raise DistutilsExecError("command %r failed: %s" % (cmd, exc.args[-1])) if os.WIFSIGNALED(status): if not DEBUG: cmd = executable raise DistutilsExecError("command %r terminated by signal %d" % (cmd, os.WTERMSIG(status))) elif os.WIFEXITED(status): exit_status = os.WEXITSTATUS(status) if exit_status == 0: return # hey, it succeeded! else: if not DEBUG: cmd = executable raise DistutilsExecError( "command %r failed with exit status %d" % (cmd, exit_status)) elif os.WIFSTOPPED(status): continue else: if not DEBUG: cmd = executable raise DistutilsExecError( "unknown error executing %r: termination status %d" % (cmd, status))
logging.info("Launching a %s" % to_launch) # Starting via a system call to manage different python version python_exec = os.getenv('PYTHON_VERSION', "python36") command = "%s %s/%s.py" % (python_exec, dir_path, to_launch) return_code = subprocess.call(command, shell=True, env=dict(os.environ, LC_ALL='en_US.utf8')) # Handles return code if os.WIFEXITED(return_code): status = "exited with status" return_code = os.WEXITSTATUS(return_code) elif os.WIFSTOPPED(return_code): status = "stopped by signal" return_code = os.WSTOPSIG(return_code) elif os.WIFSIGNALED(return_code): status = "terminated by signal" return_code = os.WTERMSIG(return_code) else: status = "Finished with code" status = "Containerized process %s %d" % (status, return_code) if return_code == 0: logging.info(status) else: logging.error(status)
def isalive(self): '''This tests if the child process is running or not. This is non-blocking. If the child was terminated then this will read the exitstatus or signalstatus of the child. This returns True if the child process appears to be running or False if not. It can take literally SECONDS for Solaris to return the right status. ''' if self.terminated: return False if self.flag_eof: # This is for Linux, which requires the blocking form # of waitpid to get the status of a defunct process. # This is super-lame. The flag_eof would have been set # in read_nonblocking(), so this should be safe. waitpid_options = 0 else: waitpid_options = os.WNOHANG try: pid, status = os.waitpid(self.pid, waitpid_options) except OSError as e: # No child processes if e.errno == errno.ECHILD: raise PtyProcessError( 'isalive() encountered condition ' + 'where "terminated" is 0, but there was no child ' + 'process. Did someone else call waitpid() ' + 'on our process?') else: raise # I have to do this twice for Solaris. # I can't even believe that I figured this out... # If waitpid() returns 0 it means that no child process # wishes to report, and the value of status is undefined. if pid == 0: try: ### os.WNOHANG) # Solaris! pid, status = os.waitpid(self.pid, waitpid_options) except OSError as e: # pragma: no cover # This should never happen... if e.errno == errno.ECHILD: raise PtyProcessError( 'isalive() encountered condition ' + 'that should never happen. There was no child ' + 'process. Did someone else call waitpid() ' + 'on our process?') else: raise # If pid is still 0 after two calls to waitpid() then the process # really is alive. This seems to work on all platforms, except for # Irix which seems to require a blocking call on waitpid or select, # so I let read_nonblocking take care of this situation # (unfortunately, this requires waiting through the timeout). if pid == 0: return True if pid == 0: return True if os.WIFEXITED(status): self.status = status self.exitstatus = os.WEXITSTATUS(status) self.signalstatus = None self.terminated = True elif os.WIFSIGNALED(status): self.status = status self.exitstatus = None self.signalstatus = os.WTERMSIG(status) self.terminated = True elif os.WIFSTOPPED(status): raise PtyProcessError( 'isalive() encountered condition ' + 'where child process is stopped. This is not ' + 'supported. Is some other process attempting ' + 'job control with our child pid?') return False
def test_terminate_sigkill(self): self._terminate_with_signal(signal.SIGKILL) status = self._reap_test() self.assertTrue(os.WIFSIGNALED(status)) self.assertEqual(os.WTERMSIG(status), signal.SIGKILL)
def loadEntitlementFromProgram(fullPath, serverName): """ Executes the given file to generate an entitlement. The executable must print to stdout a full valid entitlement xml blob. """ readFd, writeFd = os.pipe() stdErrRead, stdErrWrite = os.pipe() childPid = os.fork() if not childPid: nullFd = os.open("/dev/null", os.O_RDONLY) try: try: os.close(readFd) # switch stdin to /dev/null os.dup2(nullFd, 0) os.close(nullFd) # both error and stderr are redirected - the entitlement # should be on stdout, and error info should be # on stderr. os.dup2(writeFd, 1) os.dup2(stdErrWrite, 2) os.close(writeFd) os.close(stdErrWrite) util.massCloseFileDescriptors(3, 252) os.execl(fullPath, fullPath, serverName) except Exception: traceback.print_exc(sys.stderr) finally: os._exit(1) os.close(writeFd) os.close(stdErrWrite) # read in from pipes. When they're closed, # the child process should have exited. output = [] errorOutput = [] buf = os.read(readFd, 1024) errBuf = os.read(stdErrRead, 1024) while buf or errBuf: if buf: output.append(buf) buf = os.read(readFd, 1024) if errBuf: errorOutput.append(errBuf) errBuf = os.read(stdErrRead, 1024) pid, status = os.waitpid(childPid, 0) os.close(readFd) os.close(stdErrRead) errMsg = '' if os.WIFEXITED(status) and os.WEXITSTATUS(status): errMsg = ('Entitlement generator at "%s"' ' died with exit status %d' % (fullPath, os.WEXITSTATUS(status))) elif os.WIFSIGNALED(status): errMsg = ('Entitlement generator at "%s"' ' died with signal %d' % (fullPath, os.WTERMSIG(status))) else: errMsg = '' if errMsg: if errorOutput: errMsg += ' - stderr output follows:\n%s' % ''.join(errorOutput) else: errMsg += ' - no output on stderr' raise errors.ConaryError(errMsg) # looks like we generated an entitlement - they're still the possibility # that the entitlement is broken. xmlContent = ''.join(output) return loadEntitlementFromString(xmlContent, fullPath)
def fork_worker(self, job): """Invoked by ``work`` method. ``fork_worker`` does the actual forking to create the child process that will process the job. It's also responsible for monitoring the child process and handling hangs and crashes. Finally, the ``process`` method actually processes the job by eventually calling the Job instance's ``perform`` method. """ logger.debug('picked up job') logger.debug('job details: %s' % job) self.before_fork(job) self.child = os.fork() if self.child: self._setproctitle("Forked %s at %s" % (self.child, datetime.datetime.now())) logger.info('Forked %s at %s' % (self.child, datetime.datetime.now())) try: start = datetime.datetime.now() # waits for the result or times out while True: pid, status = os.waitpid(self.child, os.WNOHANG) if pid != 0: if os.WIFEXITED(status) and os.WEXITSTATUS( status) == 0: break if os.WIFSTOPPED(status): logger.warning("Process stopped by signal %d" % os.WSTOPSIG(status)) else: if os.WIFSIGNALED(status): raise CrashError( "Unexpected exit by signal %d" % os.WTERMSIG(status)) raise CrashError("Unexpected exit status %d" % os.WEXITSTATUS(status)) time.sleep(0.5) now = datetime.datetime.now() if self.timeout and ((now - start).seconds > self.timeout): os.kill(self.child, signal.SIGKILL) os.waitpid(-1, os.WNOHANG) raise TimeoutError("Timed out after %d seconds" % self.timeout) except OSError as ose: import errno if ose.errno != errno.EINTR: raise ose except JobError: self._handle_job_exception(job) finally: # If the child process' job called os._exit manually we need to # finish the clean up here. if self.job(): self.done_working(job) logger.debug('done waiting') else: self._setproctitle("Processing %s since %s" % (job, datetime.datetime.now())) logger.info('Processing %s since %s' % (job, datetime.datetime.now())) self.after_fork(job) # re-seed the Python PRNG after forking, otherwise # all job process will share the same sequence of # random numbers random.seed() self.process(job) os._exit(0) self.child = None
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None): '''Run forkserver.''' if preload: if '__main__' in preload and main_path is not None: process.current_process()._inheriting = True try: spawn.import_main_path(main_path) finally: del process.current_process()._inheriting for modname in preload: try: __import__(modname) except ImportError: pass util._close_stdin() sig_r, sig_w = os.pipe() os.set_blocking(sig_r, False) os.set_blocking(sig_w, False) def sigchld_handler(*_unused): # Dummy signal handler, doesn't do anything pass handlers = { # unblocking SIGCHLD allows the wakeup fd to notify our event loop signal.SIGCHLD: sigchld_handler, # protect the process from ^C signal.SIGINT: signal.SIG_IGN, } old_handlers = { sig: signal.signal(sig, val) for (sig, val) in handlers.items() } # calling os.write() in the Python signal handler is racy signal.set_wakeup_fd(sig_w) # map child pids to client fds pid_to_fd = {} with socket.socket(socket.AF_UNIX, fileno=listener_fd) as listener, \ selectors.DefaultSelector() as selector: _forkserver._forkserver_address = listener.getsockname() selector.register(listener, selectors.EVENT_READ) selector.register(alive_r, selectors.EVENT_READ) selector.register(sig_r, selectors.EVENT_READ) while True: try: while True: rfds = [key.fileobj for (key, events) in selector.select()] if rfds: break if alive_r in rfds: # EOF because no more client processes left assert os.read(alive_r, 1) == b'', "Not at EOF?" raise SystemExit if sig_r in rfds: # Got SIGCHLD os.read(sig_r, 65536) # exhaust while True: # Scan for child processes try: pid, sts = os.waitpid(-1, os.WNOHANG) except ChildProcessError: break if pid == 0: break child_w = pid_to_fd.pop(pid, None) if child_w is not None: if os.WIFSIGNALED(sts): returncode = -os.WTERMSIG(sts) else: if not os.WIFEXITED(sts): raise AssertionError( "Child {0:n} status is {1:n}".format( pid, sts)) returncode = os.WEXITSTATUS(sts) # Send exit code to client process try: write_signed(child_w, returncode) except BrokenPipeError: # client vanished pass os.close(child_w) else: # This shouldn't happen really warnings.warn('forkserver: waitpid returned ' 'unexpected pid %d' % pid) if listener in rfds: # Incoming fork request with listener.accept()[0] as s: # Receive fds from client fds = reduction.recvfds(s, MAXFDS_TO_SEND + 1) if len(fds) > MAXFDS_TO_SEND: raise RuntimeError( "Too many ({0:n}) fds to send".format( len(fds))) child_r, child_w, *fds = fds s.close() pid = os.fork() if pid == 0: # Child code = 1 try: listener.close() selector.close() unused_fds = [alive_r, child_w, sig_r, sig_w] unused_fds.extend(pid_to_fd.values()) code = _serve_one(child_r, fds, unused_fds, old_handlers) except Exception: sys.excepthook(*sys.exc_info()) sys.stderr.flush() finally: os._exit(code) else: # Send pid to client process try: write_signed(child_w, pid) except BrokenPipeError: # client vanished pass pid_to_fd[pid] = child_w os.close(child_r) for fd in fds: os.close(fd) except OSError as e: if e.errno != errno.ECONNABORTED: raise
def chk_state(st, opts): print "Will check state" sigsk_name = "\0" + "CRSIGSKC" signal_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) signal_sk.bind(sigsk_name) # FIXME Ideally call to criu should be performed by the run_state's # pid!=0 branch, but for simplicity we fork the kid which has the # same set of sockets we do, then dump it. Then restore and notify # via dgram socket to check its state. Current task still has all # the same sockets :) so we close them not to produce bind() name # conflicts on restore pid = os.fork() if pid == 0: msg = signal_sk.recv(64) ret = chk_real_state(st) sys.exit(ret) signal_sk.close() for rsk in st.real_sockets.values(): rsk.close() print "`- dump" img_path = "sti_" + st.describe() try: os.mkdir(img_path) subprocess.check_call([criu_bin, "dump", "-t", "%d" % pid, "-D", img_path, "-v4", "-o", "dump.log", "-j"]) except: print "Dump failed" os.kill(pid, signal.SIGKILL) return CHK_FAIL_DUMP print "`- restore" try: os.waitpid(pid, 0) subprocess.check_call([criu_bin, "restore", "-D", img_path, "-v4", "-o", "rst.log", "-j", "-d", "-S"]) except: print "Restore failed" return CHK_FAIL_RESTORE print "`- check" signal_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) try: signal_sk.sendto('check', sigsk_name) except: # Probably the peer has died before us or smth else went wrong os.kill(pid, signal.SIGKILL) wp, status = os.waitpid(pid, 0) if os.WIFEXITED(status): status = os.WEXITSTATUS(status) if status != CHK_PASS: print "`- exited with %d" % status return status elif os.WIFSIGNALED(status): status = os.WTERMSIG(status) print "`- killed with %d" % status return CHK_FAIL_KILLED else: return CHK_FAIL_UNKNOWN return CHK_PASS
def __spawn_execute(self): child_args = self._args gc_enabled = gc.isenabled() try: gc.disable() pid = os.fork() except: if gc_enabled: gc.enable() raise if not pid: if self._memory: resource.setrlimit(resource.RLIMIT_AS, (self._memory * 1024 + 16 * 1024 * 1024, ) * 2) os.dup2(self._stdin_, 0) os.dup2(self._stdout_, 1) os.dup2(self._stderr_, 2) # Close all file descriptors that are not standard os.closerange(3, os.sysconf("SC_OPEN_MAX")) if self._debugger: ptrace(PTRACE_TRACEME, 0, None, None) os.kill(os.getpid(), SIGSTOP) # Replace current process with the child process # This call does not return os.execv(self._child, child_args) # Unless it does, of course, in which case you're screwed # We don't cover this in the warranty # When you reach here, you are screwed # As much as being handed control of a MySQL server without # ANY SQL knowledge or docs. ENJOY. os._exit(3306) else: if gc_enabled: gc.enable() try: status = None self._pid = pid os.close(self._stdin_) os.close(self._stdout_) os.close(self._stderr_) _debugger = self._debugger if _debugger: _debugger.pid = pid # Depending on the bitness, import a different ptrace # Registers change depending on bitness, as do syscall ids bitness = self.bitness if bitness == 64: import _ptrace64 as _ptrace else: import _ptrace32 as _ptrace # Define the shells for reading syscall arguments in the debugger _debugger.arg0 = lambda: _ptrace.arg0(pid) _debugger.arg1 = lambda: _ptrace.arg1(pid) _debugger.arg2 = lambda: _ptrace.arg2(pid) _debugger.arg3 = lambda: _ptrace.arg3(pid) _debugger.arg4 = lambda: _ptrace.arg4(pid) _debugger.arg5 = lambda: _ptrace.arg5(pid) # Reverse syscall ids wrapped_ids = [None] * len(syscalls.translator) for k, x in syscalls.translator.iteritems(): call = x[bitness == 64] if call is not None: wrapped_ids[call] = k # Utility method for getting syscall number for call get_syscall_number = lambda: wrapped_ids[ _ptrace.get_syscall_number(pid)] _debugger.get_syscall_number = get_syscall_number # Let the debugger define its proxies syscall_proxies = [None] * len(syscalls.by_id) for call_id, handler in self._debugger.get_handlers( ).iteritems(): syscall_proxies[call_id] = handler self._start = time.time() self._started.set() if _debugger: _debugger._tt = 0 _debugger._st = 0 in_syscall = False while True: _, status, self._rusage = os.wait4(pid, 0) if _debugger: _debugger._st = time.time() if os.WIFEXITED(status): break if os.WIFSIGNALED(status): break if os.WSTOPSIG(status) == SIGTRAP: in_syscall = not in_syscall if not in_syscall: call = get_syscall_number() handler = syscall_proxies[call] if handler is not None: if not handler(): os.kill(pid, SIGKILL) print "Killed on", call, syscalls.by_id[ call] # The @*syscall decorators resume the syscall continue else: # Our method is not proxied, so is assumed to be disallowed # TODO: perhaps add option to cancel the syscall instead? raise AssertionError( "%d (%s)" % (call, syscalls.by_id[call])) # Not handled by a decorator: resume syscall ptrace(PTRACE_SYSCALL, pid, None, None) if _debugger: _debugger._tt += time.time() - _debugger._st else: self._start = time.time() self._started.set() _, status, self._rusage = os.wait4(pid, 0) self._r_duration = time.time() - self._start self._duration = self._r_duration - (_debugger._tt if _debugger else 0) # CPU time, __shocker uses clock time in case a malicious user sleeps or what not. #self._duration = self._rusage.ru_utime if self._time and self._duration > self._time: self._tle = True ret = os.WEXITSTATUS(status) if os.WIFEXITED( status) else -os.WTERMSIG(status) self._returncode = ret self._died.set() finally: if self.returncode is None: os.kill(self._pid, SIGKILL)
pass pid = os.fork() if pid == 0: os.execvp(sys.argv[2], sys.argv[2:]) else: # parent t = threading.Timer(timeout, childkill) t.start() while True: try: pid, status = os.waitpid(pid, 0) except KeyboardInterrupt: continue else: t.cancel() break if os.WIFEXITED(status): sys.exit(os.WEXITSTATUS(status)) else: assert os.WIFSIGNALED(status) sign = os.WTERMSIG(status) if timedout and sign == signal.SIGTERM: sys.exit(1) signame = getsignalname(sign) sys.stderr.write("="*26 + "timedout" + "="*26 + "\n") sys.stderr.write("="*25 + " %-08s " % signame + "="*25 + "\n") sys.exit(1)
def _run_in_child( self, *, chroot_dir: Path, network_config: Optional[pyspawner.NetworkConfig], compiled_module: CompiledModule, timeout: float, result: Any, function: str, args: List[Any], ) -> None: """ Fork a child process to run `function` with `args`. `args` must be Thrift data types. `result` must also be a Thrift type -- its `.read()` function will be called, which may produce an error if the child process has a bug. (EOFError is very likely.) Raise ModuleExitedError if the child process did not behave as expected. Raise ModuleTimeoutError if it did not exit after a delay -- or if it closed its file descriptors long before it exited. """ limit_time = time.time() + timeout module_process = self._pyspawner.spawn_child( args=[compiled_module, function, args], process_name=compiled_module.module_slug, sandbox_config=pyspawner.SandboxConfig( chroot_dir=chroot_dir, network=network_config ), ) # stdout is Thrift package; stderr is logs output_reader = ChildReader( module_process.stdout.fileno(), OUTPUT_BUFFER_MAX_BYTES ) log_reader = ChildReader(module_process.stderr.fileno(), LOG_BUFFER_MAX_BYTES) # Read until the child closes its stdout and stderr with selectors.DefaultSelector() as selector: selector.register(output_reader.fileno, selectors.EVENT_READ) selector.register(log_reader.fileno, selectors.EVENT_READ) timed_out = False while selector.get_map(): remaining = limit_time - time.time() if remaining <= 0: if not timed_out: timed_out = True module_process.kill() # untrusted code could ignore SIGTERM timeout = None # wait as long as it takes for everything to die # Fall through. After SIGKILL the child will close each fd, # sending EOF to us. That means the selector _must_ return. else: timeout = remaining # wait until we reach our timeout events = selector.select(timeout=timeout) ready = frozenset(key.fd for key, _ in events) for reader in (output_reader, log_reader): if reader.fileno in ready: reader.ingest() if reader.eof: selector.unregister(reader.fileno) # The child closed its fds, so it should die soon. If it doesn't, that's # a bug -- so kill -9 it! # # os.wait() has no timeout option, and asyncio messes with signals so # we won't use those. Spin until the process dies, and force-kill if we # spin too long. for _ in range(DEAD_PROCESS_N_WAITS): pid, exit_status = module_process.wait(os.WNOHANG) if pid != 0: # pid==0 means process is still running break time.sleep(DEAD_PROCESS_WAIT_POLL_INTERVAL) else: # we waited and waited. No luck. Dead module. Kill it. timed_out = True module_process.kill() _, exit_status = module_process.wait(0) if os.WIFEXITED(exit_status): exit_code = os.WEXITSTATUS(exit_status) elif os.WIFSIGNALED(exit_status): exit_code = -os.WTERMSIG(exit_status) else: raise RuntimeError("Unhandled wait() status: %r" % exit_status) if timed_out: raise ModuleTimeoutError if exit_code != 0: raise ModuleExitedError(exit_code, log_reader.to_str()) transport = thrift.transport.TTransport.TMemoryBuffer(output_reader.buffer) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(transport) try: result.read(protocol) except EOFError: # TODO handle other errors Thrift may throw raise ModuleExitedError(exit_code, log_reader.to_str()) from None # We should be at the end of the output now. If we aren't, that means # the child wrote too much. if transport.read(1) != b"": raise ModuleExitedError(exit_code, log_reader.to_str()) if log_reader.buffer: logger.info("Output from module process: %s", log_reader.to_str()) return result
def runTestRaw(name, numProcs, cmds): #the expected/correct running status # if USE_M32: # def forall(fnc, lst): # return reduce(lambda x, y: x and y, map(fnc, lst)) # if not forall(lambda x: x.startswith("./test/"), cmds): # return status=(numProcs, True) procs=[] def doesStatusSatisfy(newStatus,requiredStatus): if isinstance(requiredStatus[0], int): statRange = [requiredStatus[0]] elif isinstance(requiredStatus[0], list): statRange = requiredStatus[0] else: raise NotImplementedError return newStatus[0] in statRange and newStatus[1] == requiredStatus[1] def wfMsg(msg): #return function to generate error message return lambda: msg+", "+str(status[0])+ \ " expected, %d found, running=%d" % getStatus() def testKill(): #kill all processes coordinatorCmd(b'k') try: WAITFOR(lambda: getStatus()==(0, False), lambda:"coordinator kill command failed") except CheckFailed: global coordinator coordinatorCmd(b'q') os.system("kill -9 %d" % coordinator.pid) print("Trying to kill old coordinator, and run new one on same port") coordinator = runCmd(BIN+"dmtcp_coordinator") for x in procs: #cleanup proc try: if isinstance(x.stdin,int): os.close(x.stdin) elif x.stdin: x.stdin.close() if isinstance(x.stdout,int): os.close(x.stdout) elif x.stdout: x.stdout.close() if isinstance(x.stderr,int): os.close(x.stderr) elif x.stderr: x.stderr.close() except: None try: os.waitpid(x.pid, os.WNOHANG) except OSError as e: if e.errno != errno.ECHILD: raise e procs.remove(x) def testCheckpoint(): #start checkpoint coordinatorCmd(CKPT_CMD) #wait for files to appear and status to return to original WAITFOR(lambda: getNumCkptFiles(ckptDir)>0 and (CKPT_CMD == b'xc' or doesStatusSatisfy(getStatus(), status)), wfMsg("checkpoint error")) #we now know there was at least one checkpoint file, and the correct number # of processes have restarted; but they may fail quickly after restert if SLOW > 1: #wait and give the processes time to write all of the checkpoint files sleep(S*SLOW) #make sure the right files are there numFiles=getNumCkptFiles(ckptDir) # len(os.listdir(ckptDir)) CHECK(doesStatusSatisfy((numFiles,True),status), "unexpected number of checkpoint files, %s procs, %d files" % (str(status[0]), numFiles)) if SLOW > 1: #wait and see if some processes will die shortly after checkpointing sleep(S*SLOW) CHECK(doesStatusSatisfy(getStatus(), status), "error: processes checkpointed, but died upon resume") def testRestart(): #build restart command cmd=BIN+"dmtcp_restart --quiet" for i in os.listdir(ckptDir): if i.endswith(".dmtcp"): cmd+= " "+ckptDir+"/"+i #run restart and test if it worked procs.append(runCmd(cmd)) WAITFOR(lambda: doesStatusSatisfy(getStatus(), status), wfMsg("restart error")) if SLOW > 1: #wait and see if process will die shortly after restart sleep(S*SLOW) CHECK(doesStatusSatisfy(getStatus(), status), "error: processes restarted and then died") if HBICT_DELTACOMP == "no": clearCkptDir() try: printFixed(name,15) if not shouldRunTest(name): print("SKIPPED") return stats[1]+=1 CHECK(getStatus()==(0, False), "coordinator initial state") #start user programs for cmd in cmds: procs.append(runCmd(BIN+"dmtcp_launch "+cmd)) #TIMEOUT in WAITFOR has also been multiplied by SLOW WAITFOR(lambda: doesStatusSatisfy(getStatus(), status), wfMsg("user program startup error")) # Additional sleep to allow the test to boot. sleep(POST_LAUNCH_SLEEP) #Will sleep(S*SLOW) in the following for loop. for i in range(CYCLES): if i!=0 and i%2==0: printFixed("\n") printFixed("",15) printFixed("ckpt:") # NOTE: If this faile, it will throw an exception to CheckFailed # of this function: testRestart #wait for launched processes to settle down, before we try to checkpoint sleep(S*SLOW) testCheckpoint() printFixed("PASSED; ") testKill() printFixed("rstr:") for j in range(RETRIES): try: testRestart() printFixed("PASSED") break except CheckFailed as e: if j == RETRIES-1: # Save checkpoint images for later diagnosis. if os.path.isdir(dmtcp_tmpdir()) and os.path.isdir(ckptDir): if subprocess.call( ("cp -pr " + ckptDir + ' ' + dmtcp_tmpdir()).split() ) == 0: print("\n***** Copied checkpoint images to " + dmtcp_tmpdir() + "/" + ckptDir) raise e else: printFixed("FAILED ") (oldpid, oldstatus) = os.waitpid(procs[-1].pid, os.WNOHANG) if oldpid == procs[-1].pid: if os.WIFEXITED(oldstatus): printFixed("(first process exited: oldstatus " + str(os.WEXITSTATUS(oldstatus)) + ")") if os.WIFSIGNALED(oldstatus): printFixed("(first process rec'd signal " + str(os.WTERMSIG(oldstatus)) + ")") if os.WCOREDUMP(oldstatus): coredump = "core." + str(oldpid) if os.path.isdir(dmtcp_tmpdir()) and os.path.isfile(coredump): if subprocess.call( ("cp -pr " + coredump + ' ' + dmtcp_tmpdir()).split() ) == 0: printFixed(" (" + coredump + " copied to DMTCP_TMPDIR:" + dmtcp_tmpdir() + "/)") else: printFixed("(Either first process didn't die, or else this long" + " delay has been observed due to a slow" + " NFS-based filesystem.)") printFixed("; retry:") testKill() if i != CYCLES - 1: printFixed(" -> ") if i % 2 == 1: printFixed("(cont.)") testKill() printFixed("\n") stats[0]+=1 except CheckFailed as e: print("FAILED") printFixed("",15) print("root-pids:", [x.pid for x in procs], "msg:", e.value) try: testKill() except CheckFailed as e: print("CLEANUP ERROR:", e.value) SHUTDOWN() saveResultsNMI() sys.exit(1) if args.retry_once: clearCkptDir() raise e clearCkptDir()
# compatibility code is still in place. self.child_pid = self.terminal.fork_command(command=command[0], argv=command, **kws) while self.vte_fork_running: gtk.main_iteration() if self.quit: raise ExitRequestedException() self.child_pid = None if os.WIFEXITED(self.vte_child_exit_status): rc = os.WEXITSTATUS(self.vte_child_exit_status) elif os.WIFSIGNALED(self.vte_child_exit_status): raise CommandError( _('%(command)s died with signal %(rc)s') % { 'command': short_command, 'rc': os.WTERMSIG(self.vte_child_exit_status) }) if rc: raise CommandError( _('%(command)s returned with an error code (%(rc)s)') % { 'command': short_command, 'rc': rc }) def on_vte_child_exit_cb(self, terminal): self.vte_fork_running = False self.vte_child_exit_status = self.terminal.get_child_exit_status() class SelectModulesDialog(gtk.Dialog):
def start(self, trace=0): """Attempts to start the daemons. The return value is defined by the LSB: 0 Success 4 Insufficient privileges """ xend_pid = self.cleanup_xend(False) if self.set_user(): return 4 os.chdir("/") if xend_pid > 0: # Trying to run an already-running service is a success. return 0 ret = 0 # If we're not going to create a daemon, simply # call the run method right here. if not XEND_DAEMONIZE: self.tracing(trace) self.run(None) return ret # we use a pipe to communicate between the parent and the child process # this way we know when the child has actually initialized itself so # we can avoid a race condition during startup r, w = os.pipe() if os.fork(): os.close(w) r = os.fdopen(r, 'r') try: s = r.read() finally: r.close() if not len(s): ret = 1 else: ret = int(s) else: os.close(r) # Child self.daemonize() self.tracing(trace) # If Xend proper segfaults, then we want to restart it. Thus, # we fork a child for running Xend itself, and if it segfaults # (or exits any way other than cleanly) then we run it again. # The first time through we want the server to write to the (r,w) # pipe created above, so that we do not exit until the server is # ready to receive requests. All subsequent restarts we don't # want this behaviour, or the pipe will eventually fill up, so # we just pass None into run in subsequent cases (by clearing w # in the parent of the first fork). On some operating systems, # restart is managed externally, so we won't fork, and just exit. while True: if not osdep.xend_autorestart: self.run(os.fdopen(w, 'w')) os._exit(0) pid = self.fork_pid() if pid: if w is not None: os.close(w) w = None (_, status) = os.waitpid(pid, 0) if os.WIFEXITED(status): code = os.WEXITSTATUS(status) log.info('Xend exited with status %d.', code) sys.exit(code) if os.WIFSIGNALED(status): sig = os.WTERMSIG(status) if sig in (signal.SIGINT, signal.SIGTERM): log.info('Xend stopped due to signal %d.', sig) sys.exit(0) else: log.fatal( 'Xend died due to signal %d! Restarting it.', sig) else: self.run(w and os.fdopen(w, 'w') or None) # if we reach here, the child should quit. os._exit(0) return ret
def monitorp(self): options = os.WNOHANG loop = 1 while loop: try: ret = os.waitpid(self.pid, options) if ret[0] == self.pid: if os.WIFEXITED(ret[1]): self.log.write_info("Child %d exited, status = %d." % (self.pid, os.WEXITSTATUS(ret[1]))) elif os.WIFSIGNALED(ret[1]): self.log.write_info("Child %d exited due to signal %d." % (self.pid, os.WTERMSIG(ret[1]))) else: log_file.write_info("Child %d exited abnormally." % self.pid ) return(ret[1]) if self.expiration > 0: # check whether child has been running too long if time.time() - self.start_time > self.expiration: os.kill(self.pid, signal.SIGTERM) ret = os.waitpid(self.pid, options) # wait a bit and then kill again with SIGKILL to ensure the kill time.sleep(self.kill_sleep) os.kill(self.pid, signal.SIGKILL) self.log.write_error("Process %s, pid %d, ran past expiration time. Terminating." % (self.args[0], self.pid)) return(-1) if self.monitor_log_file != "" and self.monitor_log_expiration > 0: # check whether child has modified its log file recently timeval = time.time() mod_time = log.log_mod_time(self.monitor_log_file, timeval) if mod_time > 0: self.monitor_log_time = mod_time if timeval - self.monitor_log_time > self.monitor_log_expiration: #print "timeval %d, mod_time %d, timeval - mod_time %d" % (timeval, mod_time, timeval - mod_time) os.kill(self.pid, signal.SIGTERM) ret = os.waitpid(self.pid, options) # wait a bit and then kill again with SIGKILL to ensure the kill time.sleep(self.kill_sleep) os.kill(self.pid, signal.SIGKILL) self.log.write_error("Process %s, pid %d, log file stale. Terminating." % (self.args[0], self.pid)) return(-1) except: self.log.write_error("Process %s, pid %d: failure in waitpid() section. Exception %s, %s. Terminating." % (self.args[0], self.pid, sys.exc_type, sys.exc_value)) return(-1) if loop == 1: time.sleep(self.monitor_sleep) return(0)
def wait_pid(pid, timeout=None, proc_name=None, _waitpid=os.waitpid, _timer=getattr(time, 'monotonic', time.time), _min=min, _sleep=time.sleep, _pid_exists=pid_exists): """Wait for a process PID to terminate. If the process terminated normally by calling exit(3) or _exit(2), or by returning from main(), the return value is the positive integer passed to *exit(). If it was terminated by a signal it returns the negated value of the signal which caused the termination (e.g. -SIGTERM). If PID is not a children of os.getpid() (current process) just wait until the process disappears and return None. If PID does not exist at all return None immediately. If *timeout* != None and process is still alive raise TimeoutExpired. timeout=0 is also possible (either return immediately or raise). """ if pid <= 0: raise ValueError("can't wait for PID 0") # see "man waitpid" interval = 0.0001 flags = 0 if timeout is not None: flags |= os.WNOHANG stop_at = _timer() + timeout def sleep(interval): # Sleep for some time and return a new increased interval. if timeout is not None: if _timer() >= stop_at: raise TimeoutExpired(timeout, pid=pid, name=proc_name) _sleep(interval) return _min(interval * 2, 0.04) # See: https://linux.die.net/man/2/waitpid while True: try: retpid, status = os.waitpid(pid, flags) except InterruptedError: interval = sleep(interval) except ChildProcessError: # This has two meanings: # - PID is not a child of os.getpid() in which case # we keep polling until it's gone # - PID never existed in the first place # In both cases we'll eventually return None as we # can't determine its exit status code. while _pid_exists(pid): interval = sleep(interval) return else: if retpid == 0: # WNOHANG flag was used and PID is still running. interval = sleep(interval) continue elif os.WIFEXITED(status): # Process terminated normally by calling exit(3) or _exit(2), # or by returning from main(). The return value is the # positive integer passed to *exit(). return os.WEXITSTATUS(status) elif os.WIFSIGNALED(status): # Process exited due to a signal. Return the negative value # of that signal. return negsig_to_enum(-os.WTERMSIG(status)) # elif os.WIFSTOPPED(status): # # Process was stopped via SIGSTOP or is being traced, and # # waitpid() was called with WUNTRACED flag. PID is still # # alive. From now on waitpid() will keep returning (0, 0) # # until the process state doesn't change. # # It may make sense to catch/enable this since stopped PIDs # # ignore SIGTERM. # interval = sleep(interval) # continue # elif os.WIFCONTINUED(status): # # Process was resumed via SIGCONT and waitpid() was called # # with WCONTINUED flag. # interval = sleep(interval) # continue else: # Should never happen. raise ValueError("unknown process exit status %r" % status)
dmtcp_tmpdir()).split()) == 0: print "\n***** Copied checkpoint images to " + dmtcp_tmpdir() \ + "/" + ckptDir raise e else: printFixed("FAILED ") (oldpid, oldstatus) = os.waitpid(procs[-1].pid, os.WNOHANG) if oldpid == procs[-1].pid: if os.WIFEXITED(oldstatus): printFixed( "(first process exited: oldstatus " + str(os.WEXITSTATUS(oldstatus)) + ")") if os.WIFSIGNALED(oldstatus): printFixed("(first process rec'd signal " + str(os.WTERMSIG(oldstatus)) + ")") if os.WCOREDUMP(oldstatus): coredump = "core." + str(oldpid) if os.path.isdir(dmtcp_tmpdir( )) and os.path.isfile(coredump): if subprocess.call( ("cp -pr " + coredump + ' ' + dmtcp_tmpdir()).split()) == 0: printFixed(" (" + coredump + " copied to DMTCP_TMPDIR:" + dmtcp_tmpdir() + "/)") else: printFixed("(first process didn't die)") printFixed(" retry:") testKill() if i != CYCLES - 1:
def __init__(self, command, working_dir=None, capture_stderr=True, env=None): """Changes into a specified directory, if provided, and executes a command. Restores the old directory afterwards. Args: command: The command to run, in the form of sys.argv. working_dir: The directory to change into. capture_stderr: Determines whether to capture stderr in the output member or to discard it. env: Dictionary with environment to pass to the subprocess. Returns: An object that represents outcome of the executed process. It has the following attributes: terminated_by_signal True iff the child process has been terminated by a signal. signal Sygnal that terminated the child process. exited True iff the child process exited normally. exit_code The code with which the child process exited. output Child process's stdout and stderr output combined in a string. """ # The subprocess module is the preferrable way of running programs # since it is available and behaves consistently on all platforms, # including Windows. But it is only available starting in .python 2.4. # In earlier .python versions, we revert to the popen2 module, which is # available in .python 2.0 and later but doesn't provide required # functionality (Popen4) under Windows. This allows us to support Mac # OS X 10.4 Tiger, which has .python 2.3 installed. if _SUBPROCESS_MODULE_AVAILABLE: if capture_stderr: stderr = subprocess.STDOUT else: stderr = subprocess.PIPE p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=stderr, cwd=working_dir, universal_newlines=True, env=env) # communicate returns a tuple with the file obect for the child's # output. self.output = p.communicate()[0] self._return_code = p.returncode else: old_dir = os.getcwd() def _ReplaceEnvDict(dest, src): # Changes made by os.environ.clear are not inheritable by child # processes until Python 2.6. To produce inheritable changes we have # to delete environment items with the del statement. for key in dest.keys(): del dest[key] dest.update(src) # When 'env' is not None, backup the environment variables and replace # them with the passed 'env'. When 'env' is None, we simply use the # current 'os.environ' for compatibility with the subprocess.Popen # semantics used above. if env is not None: old_environ = os.environ.copy() _ReplaceEnvDict(os.environ, env) try: if working_dir is not None: os.chdir(working_dir) if capture_stderr: p = popen2.Popen4(command) else: p = popen2.Popen3(command) p.tochild.close() self.output = p.fromchild.read() ret_code = p.wait() finally: os.chdir(old_dir) # Restore the old environment variables # if they were replaced. if env is not None: _ReplaceEnvDict(os.environ, old_environ) # Converts ret_code to match the semantics of # subprocess.Popen.returncode. if os.WIFSIGNALED(ret_code): self._return_code = -os.WTERMSIG(ret_code) else: # os.WIFEXITED(ret_code) should return True here. self._return_code = os.WEXITSTATUS(ret_code) if self._return_code < 0: self.terminated_by_signal = True self.exited = False self.signal = -self._return_code else: self.terminated_by_signal = False self.exited = True self.exit_code = self._return_code
def test_shutdown_handler(self): with patch('sys.exit') as exit: _shutdown_cleanup(15, Mock()) self.assertTrue(exit.called) self.assertEqual(os.WTERMSIG(exit.call_args[0][0]), 15)