def _handleSignals(self): # Bypass installing the child waker, for now _SignalReactorMixin._handleSignals(self) try: signal.set_wakeup_fd(self._signal_fds.writer_fileno()) except ValueError: pass
def add_signal_handler(self, sig, callback, *args): """Add a handler for a signal. UNIX only. Raise ValueError if the signal number is invalid or uncatchable. Raise RuntimeError if there is a problem setting up the handler. """ if (coroutines.iscoroutine(callback) or coroutines.iscoroutinefunction(callback)): raise TypeError("coroutines cannot be used " "with add_signal_handler()") self._check_signal(sig) self._check_closed() try: # set_wakeup_fd() raises ValueError if this is not the # main thread. By calling it early we ensure that an # event loop running in another thread cannot add a signal # handler. signal.set_wakeup_fd(self._csock.fileno()) except (ValueError, OSError) as exc: raise RuntimeError(str(exc)) handle = events.Handle(callback, args, self) self._signal_handlers[sig] = handle try: if compat.PY33: # On Python 3.3 and newer, the C signal handler writes the # signal number into the wakeup file descriptor and then calls # Py_AddPendingCall() to schedule the Python signal handler. # # Register a dummy signal handler to ask Python to write the # signal number into the wakup file descriptor. # _process_self_data() will read signal numbers from this file # descriptor to handle signals. signal.signal(sig, _sighandler_noop) else: # On Python 3.2 and older, the C signal handler first calls # Py_AddPendingCall() to schedule the Python signal handler, # and then write a null byte into the wakeup file descriptor. signal.signal(sig, self._handle_signal) # Set SA_RESTART to limit EINTR occurrences. signal.siginterrupt(sig, False) except (RuntimeError, OSError) as exc: # On Python 2, signal.signal(signal.SIGKILL, signal.SIG_IGN) raises # RuntimeError(22, 'Invalid argument'). On Python 3, # OSError(22, 'Invalid argument') is raised instead. exc_type, exc_value, tb = sys.exc_info() del self._signal_handlers[sig] if not self._signal_handlers: try: signal.set_wakeup_fd(-1) except (ValueError, OSError) as nexc: logger.info('set_wakeup_fd(-1) failed: %s', nexc) if isinstance(exc, RuntimeError) or exc.errno == errno.EINVAL: raise RuntimeError('sig {0} cannot be caught'.format(sig)) else: reraise(exc_type, exc_value, tb)
def _setup_signals(self): """Set up signal handlers. On Windows this uses a QTimer to periodically hand control over to Python so it can handle signals. On Unix, it uses a QSocketNotifier with os.set_wakeup_fd to get notified. """ signal.signal(signal.SIGINT, self.interrupt) signal.signal(signal.SIGTERM, self.interrupt) if os.name == 'posix' and hasattr(signal, 'set_wakeup_fd'): import fcntl read_fd, write_fd = os.pipe() for fd in (read_fd, write_fd): flags = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) self._signal_notifier = QSocketNotifier( read_fd, QSocketNotifier.Read, self) self._signal_notifier.activated.connect(self._handle_signal_wakeup) signal.set_wakeup_fd(write_fd) else: self._signal_timer = usertypes.Timer(self, 'python_hacks') self._signal_timer.start(1000) self._signal_timer.timeout.connect(lambda: None)
def main(): pipe_r, pipe_w = os.pipe() flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) flags = flags | os.O_NONBLOCK fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags) signal.signal(signal.SIGCHLD, lambda x,y: None) signal.signal(signal.SIGALRM, lambda x,y: None) signal.siginterrupt(signal.SIGCHLD,False) #makes no difference signal.siginterrupt(signal.SIGALRM,False) #makes no difference signal.set_wakeup_fd(pipe_w) signal.setitimer(signal.ITIMER_REAL, 2, 2) poller = select.epoll() poller.register(pipe_r, select.EPOLLIN) poller.register(sys.stdin, select.EPOLLIN) print "Main screen turn on" while True: events=[] try: events = poller.poll() try: for fd, flags in events: ch=os.read(fd, 1) if fd==pipe_r: sys.stdout.write( "We get Signal" ) if fd==sys.stdin.fileno(): sys.stdout.write( ch ) sys.stdout.flush() except IOError as e: print "exception loop" + str(e) except IOError as e: print "exception poll" + str(e)
def _shutdown_resources(self): log.debug("Kernel %r shutting down", self) if self._notify_sock: self._notify_sock.close() self._notify_sock = None self._wait_sock.close() self._wait_sock = None if self._signal_sets: signal.set_wakeup_fd(-1) self._signal_sets = None self._default_signals = None if self._selector: self._selector.close() self._selector = None if self._thread_pool: self._thread_pool.shutdown() self._thread_pool = None if self._process_pool: self._process_pool.shutdown() self._process_pool = None if self._monitor: self._monitor.close()
def add_signal_handler(self, sig, callback, *args): """Add a handler for a signal. UNIX only. Raise ValueError if the signal number is invalid or uncatchable. Raise RuntimeError if there is a problem setting up the handler. """ self._check_signal(sig) try: # set_wakeup_fd() raises ValueError if this is not the # main thread. By calling it early we ensure that an # event loop running in another thread cannot add a signal # handler. signal.set_wakeup_fd(self._csock.fileno()) except ValueError as exc: raise RuntimeError(str(exc)) handle = events.make_handle(callback, args) self._signal_handlers[sig] = handle try: signal.signal(sig, self._handle_signal) # Set SA_RESTART to limit EINTR occurrences. signal.siginterrupt(sig, False) except OSError as exc: del self._signal_handlers[sig] if not self._signal_handlers: try: signal.set_wakeup_fd(-1) except ValueError as nexc: logger.info('set_wakeup_fd(-1) failed: %s', nexc) if exc.errno == errno.EINVAL: raise RuntimeError('sig {} cannot be caught'.format(sig)) else: raise
def register(): ''' This function creates a select.poll object that can be used in the same manner as signal.pause(). The poll object returns each time a signal was received by the process. This function has to be called from the main thread. ''' global _signal_poller global _signal_read_fd if _signal_poller is not None: raise RuntimeError('register was already called') read_fd, write_fd = os.pipe() # Python c-level signal handler requires that the write end will be in # non blocking mode filecontrol.set_non_blocking(write_fd) # Set the read pipe end to non-blocking too, just in case. filecontrol.set_non_blocking(read_fd) # Prevent subproccesses we execute from inheriting the pipes. filecontrol.set_close_on_exec(write_fd) filecontrol.set_close_on_exec(read_fd) signal.set_wakeup_fd(write_fd) poller = select.poll() poller.register(read_fd, select.POLLIN) _signal_poller = poller _signal_read_fd = read_fd
def _shutdown(): nonlocal njobs for task in sorted(tasks.values(), key=lambda t: t.id, reverse=True): if task.id == self._kernel_task_id: continue # If the task is daemonic, force it to non-daemon status and cancel it if task.daemon: njobs += 1 task.daemon = False assert _cancel_task(task) # Run all of the daemon tasks through cancellation if ready: self.run() # Cancel the kernel loopback task (if any) task = tasks.pop(self._kernel_task_id, None) if task: task.cancel_func() self._notify_sock.close() self._notify_sock = None self._wait_sock.close() self._wait_sock = None self._kernel_task_id = None # Remove the signal handling file descriptor (if any) if self._signal_sets: signal.set_wakeup_fd(-1) self._signal_sets = None self._default_signals = None
def remove_signal_handler(self, sig): """Remove a handler for a signal. UNIX only. Return True if a signal handler was removed, False if not. """ self._check_signal(sig) try: del self._signal_handlers[sig] except KeyError: return False if sig == signal.SIGINT: handler = signal.default_int_handler else: handler = signal.SIG_DFL try: signal.signal(sig, handler) except OSError as exc: if exc.errno == errno.EINVAL: raise RuntimeError('sig {} cannot be caught'.format(sig)) else: raise if not self._signal_handlers: try: signal.set_wakeup_fd(-1) except (ValueError, OSError) as exc: logger.info('set_wakeup_fd(-1) failed: %s', exc) return True
def _signal_pipe(self): # Set up a pipe for SIGCHLD notifications wakeup_r, wakeup_w = os.pipe() fcntl.fcntl(wakeup_w, fcntl.F_SETFL, # Make the pipe non-blocking fcntl.fcntl(wakeup_w, fcntl.F_GETFL, 0) | os.O_NONBLOCK) signal.set_wakeup_fd(wakeup_w) # Tell Python to send a byte to this pipe on signal signal.signal(signal.SIGCHLD, lambda x,y: None) # Stop ignoring SIGCHLD return wakeup_r, wakeup_w
def test_invalid_call(self): # First parameter is positional-only with self.assertRaises(TypeError): signal.set_wakeup_fd(signum=signal.SIGINT) # warn_on_full_buffer is a keyword-only parameter with self.assertRaises(TypeError): signal.set_wakeup_fd(signal.SIGINT, False)
def init(cls): """ Creates a pipe for waking up a select call when a signal has been received. """ cls.__wake_up_pipe = os.pipe() fcntl.fcntl(cls.__wake_up_pipe[0], fcntl.F_SETFL, os.O_NONBLOCK) signal.set_wakeup_fd(EventQueueEmptyEventHandler.__wake_up_pipe[1])
def __init__(self): self.lock = threading.RLock() self.condition = threading.Condition(self.lock) # this lock and conditions are used for: # # - mutual exclusion and synchronization beetween sections of # code in _Conductor.__io_loop (conductor thread), and in # Process.start() and Process.wait() (main thread) # # - mutual exclusion beetween sections of code in # _Conductor.__io_loop() (conductor thread) and in # _Conductor.__reaper_thread_func() (reaper thread) self.__io_thread = threading.Thread(target = self.__io_thread_func, name = "I/O") self.__io_thread.setDaemon(True) # thread will terminate automatically when the main thread # exits. once in a while, this can trigger an exception, but # this seems to be safe and to be related to this issue: # http://bugs.python.org/issue1856 self.__rpipe, self.__wpipe = os.pipe() # pipe used to wakeup # the conductor thread # from the main thread # when needed _set_fd_nonblocking(self.__rpipe) # the reading function # _read_asmuch() relies on # file descriptors to be non # blocking _set_fd_nonblocking(self.__wpipe) # because we call # signal.set_wakeup_fd on this pipe self.__poller = poll() # asynchronous I/O with all # subprocesses filehandles self.__poller.register(self.__rpipe, POLLIN | POLLERR) self.__processes = set() # the set of `Process` handled by # this `_Conductor` self.__fds = dict() # keys: the file descriptors currently polled by # this `_Conductor` # # values: tuples (`Process`, `Process`'s # function to handle activity for this # descriptor) self.__pids = dict() # keys: the pids of the subprocesses # launched by this `_Conductor` # # values: their `Process` self.__timeline = [] # heapq of `Process` with a timeout date self.__process_actions = queue.Queue() # thread-safe FIFO used to send requests # from main thread and conductor thread: # we enqueue tuples (function to call, # tuple of parameters to pass to this # function)) self.__reaper_thread_running = False # to keep track wether reaper thread is # running signal.set_wakeup_fd(self.__wpipe) self.pgrp = self.__start_pgrp()
def close(self): if not self._closed: self._closed = True _check(kernel32.CloseHandle(self._iocp)) if self._iocp_thread is not None: self._iocp_thread.join() self._main_thread_waker.close() if is_main_thread(): signal.set_wakeup_fd(self._old_signal_wakeup_fd)
def __init__(self): self.readmap = {} self.writemap = {} # Setup the wakeup file descriptor to avoid hanging on lost signals. wakeup_readfd, wakeup_writefd = os.pipe() fcntl.fcntl(wakeup_writefd, fcntl.F_SETFL, os.O_NONBLOCK) self.register_read(wakeup_readfd, self.wakeup_handler) signal.set_wakeup_fd(wakeup_writefd)
def dispose(self, exc=None): if self.current[0] == self: signal.signal(signal.SIGCHLD, signal.SIG_DFL) signal.set_wakeup_fd(-1) self.current[0] = None error = Result.from_exception(exc or CanceledError('process queue has been disposed')) pids, self.pids = self.pids, {} for ret in self.pids.values(): ret(error)
def main(): writer, reader = socket.socketpair() writer.setblocking(False) reader.setblocking(False) signal.set_wakeup_fd(writer.fileno()) # Keep trying until we lose the race... for attempt in itertools.count(): print(f"Attempt {attempt}: start") # Make sure the socket is empty drained = drain(reader) if drained: print(f"Attempt {attempt}: ({drained} residual bytes discarded)") # Arrange for SIGINT to be delivered 1 second from now thread = threading.Thread(target=raise_SIGINT_soon) thread.start() # Fake an IO loop that's trying to sleep for 10 seconds (but will # hopefully get interrupted after just 1 second) start = time.monotonic() target = start + 10 try: select_calls = 0 drained = 0 while True: now = time.monotonic() if now > target: break select_calls += 1 r, _, _ = select.select([reader], [], [], target - now) if r: # In theory we should loop to fully drain the socket but # honestly there's 1 byte in there at most and it'll be # fine. drained += drain(reader) except KeyboardInterrupt: pass else: print(f"Attempt {attempt}: no KeyboardInterrupt?!") # We expect a successful run to take 1 second, and a failed run to # take 10 seconds, so 2 seconds is a reasonable cutoff to distinguish # them. duration = time.monotonic() - start if duration < 2: print(f"Attempt {attempt}: OK, trying again " f"(select_calls = {select_calls}, drained = {drained})") else: print(f"Attempt {attempt}: FAILED, took {duration} seconds") print(f"select_calls = {select_calls}, drained = {drained}") break thread.join()
def __init__(self, zkservers, config_path): self._signal_notifier = os.pipe() signal.set_wakeup_fd(self._signal_notifier[1]) signal.signal(signal.SIGCHLD, self._sigchld) zh = zkwrapper.ZKWrapper(zkservers) core.set_default_zkwrapper(zh) core.set_default_ping_fd(self._signal_notifier[1]) self._inotify_watcher = InotifyWatcher([config_path], ConfigFile, self._is_config_file) self._sigchld_received = False
def start(self): if self._running: raise RuntimeError('IOLoop is already running') if not logging.getLogger().handlers: # The IOLoop catches and logs exceptions, so it's # important that log output be visible. However, python's # default behavior for non-root loggers (prior to python # 3.2) is to print an unhelpful "no handlers could be # found" message rather than the actual log entry, so we # must explicitly configure logging if we've made it this # far without anything. logging.basicConfig() if self._stopped: self._stopped = False return old_current = getattr(IOLoop._current, "instance", None) IOLoop._current.instance = self self._thread_ident = thread.get_ident() # pyuv won't interate the loop if the poll is interrupted by # a signal, so make sure we can wake it up to catch signals # registered with the signal module # # If someone has already set a wakeup fd, we don't want to # disturb it. This is an issue for twisted, which does its # SIGCHILD processing in response to its own wakeup fd being # written to. As long as the wakeup fd is registered on the IOLoop, # the loop will still wake up and everything should work. old_wakeup_fd = None self._signal_checker.stop() if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': # requires python 2.6+, unix. set_wakeup_fd exists but crashes # the python process on windows. try: old_wakeup_fd = signal.set_wakeup_fd(self._fdwaker.writer.fileno()) if old_wakeup_fd != -1: # Already set, restore previous value. This is a little racy, # but there's no clean get_wakeup_fd and in real use the # IOLoop is just started once at the beginning. signal.set_wakeup_fd(old_wakeup_fd) old_wakeup_fd = None else: self._signal_checker.start() except ValueError: # non-main thread pass self._running = True self._loop.run(pyuv.UV_RUN_DEFAULT) # reset the stopped flag so another start/stop pair can be issued self._running = False self._stopped = False IOLoop._current.instance = old_current if old_wakeup_fd is not None: signal.set_wakeup_fd(old_wakeup_fd)
def add_signal_watch(signal_action, _sockets=[]): """Catches signals which should exit the program and calls `signal_action` after the main loop has started, even if the signal occurred before the main loop has started. """ # See https://bugzilla.gnome.org/show_bug.cgi?id=622084 for details sig_names = ["SIGINT", "SIGTERM", "SIGHUP"] if os.name == "nt": sig_names = ["SIGINT", "SIGTERM"] signals = {} for name in sig_names: id_ = getattr(signal, name, None) if id_ is None: continue signals[id_] = name for signum, name in signals.items(): # Before the mainloop starts we catch signals in python # directly and idle_add the app.quit def idle_handler(signum, frame): print_d("Python signal handler activated: %s" % signals[signum]) GLib.idle_add(signal_action, priority=GLib.PRIORITY_HIGH) print_d("Register Python signal handler: %r" % name) signal.signal(signum, idle_handler) read_socket, write_socket = socket.socketpair() for sock in [read_socket, write_socket]: sock.setblocking(False) # prevent it from being GCed and leak it _sockets.append(sock) def signal_notify(source, condition): if condition & GLib.IOCondition.IN: try: return bool(read_socket.recv(1)) except EnvironmentError: return False else: return False if os.name == "nt": channel = GLib.IOChannel.win32_new_socket(read_socket.fileno()) else: channel = GLib.IOChannel.unix_new(read_socket.fileno()) io_add_watch(channel, GLib.PRIORITY_HIGH, (GLib.IOCondition.IN | GLib.IOCondition.HUP | GLib.IOCondition.NVAL | GLib.IOCondition.ERR), signal_notify) signal.set_wakeup_fd(write_socket.fileno())
def _set_signal_handlers(self): """Set the signal handlers.""" def noopSignalHandler(*args): pass self._sigchld_handler = signal.signal(signal.SIGCHLD, noopSignalHandler) self._sigint_handler = signal.signal(signal.SIGINT, noopSignalHandler) self._sigterm_handler = signal.signal(signal.SIGTERM, noopSignalHandler) sig_r_fd, sig_w_fd = os.pipe2(os.O_NONBLOCK | os.O_CLOEXEC) signal.set_wakeup_fd(sig_w_fd) self._add_read_fd_handler(sig_r_fd, self._handle_sig_fd, None)
def _shutdown_resources(self): if self._notify_sock: self._notify_sock.close() self._notify_sock = None self._wait_sock.close() self._wait_sock = None if self._signal_sets: signal.set_wakeup_fd(-1) self._signal_sets = None self._default_signals = None
def __init__(self, proactor): super().__init__() logger.debug('Using proactor: %s', proactor.__class__.__name__) self._proactor = proactor self._selector = proactor # convenient alias self._self_reading_future = None self._accept_futures = {} # socket file descriptor => Future proactor.set_loop(self) self._make_self_pipe() self_no = self._csock.fileno() signal.set_wakeup_fd(self_no)
def __init__(self, sockfile, args, executable=None, use_path=True, env=None, restart=False): """ Start a new daemon. The child will be started and Unix socket will be opened. Connections are not yet accepted, call the run method to start handling connection and hand over the program execution to the SignalProxyDaemon. :param sockfile: Path to the Unix to listen on :param Sequence[str] args: Args of the process to exec :param str executable: Optional, if given this executable instead of the zeroth argument is used as executable. :param bool use_path: Use the PATH variable to find the executable, defaults to True :param dict[str,str] env: If given set the child process's environment, otherwise use the environment of the current process. :param bool restart: If True, restart the child process if it died, otherwise the SignalProxyDaemon will shut itself down, if the child dies. """ if not args: raise ValueError("Empty argument list") if executable is None: executable = args[0] self.sockfile = sockfile self.restart = restart self.args = args self.executable = executable self.use_path = use_path self.env = env self.pid = None self.last_forkexec = -1 try: options = os.O_CLOEXEC | os.O_NONBLOCK self.sig_read_fd, self.sig_write_fd = os.pipe2(options) signal.set_wakeup_fd(self.sig_write_fd) for signo in self.sigset: signal.signal(signo, self._noop_handler) logger.info('Listening on %s', sockfile) if os.path.exists(sockfile): os.unlink(sockfile) self.server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.server.bind(sockfile) self.server.setblocking(False) self.server.listen(0) self.poll = select.poll() self.connections = {} self._forkexec() self.state = DaemonState.started except: self._restore_signals() self._close_files() raise
def test_set_wakeup_fd_result(self): r1, w1 = os.pipe() self.addCleanup(os.close, r1) self.addCleanup(os.close, w1) r2, w2 = os.pipe() self.addCleanup(os.close, r2) self.addCleanup(os.close, w2) signal.set_wakeup_fd(w1) self.assertEqual(signal.set_wakeup_fd(w2), w1) self.assertEqual(signal.set_wakeup_fd(-1), w2) self.assertEqual(signal.set_wakeup_fd(-1), -1)
def init(signals, purge=False): """ Initialize the core threading/mainloop functionality by creating the thread notifier and signal wakeup pipes, and registering them with the notifier. :param signals: the main loop Signals object (passed by main.py) :param purge: if True, any pending callbacks queued for execution in the mainloop will be removed. This is useful when we have forked and want to wipe the slate clean. This function also installs a SIGCHLD handler, mainly for lack of a better place. If this function is called multiple times, it must recreate the pipes and cleanup after previous invocations. """ log.debug('Creating thread notifier and signal wakeup pipes (purge=%s)', purge) if CoreThreading._pipe: # There is an existing pipe already, so stop monitoring it. notifier.socket_remove(CoreThreading._pipe[0]) CoreThreading._pipe = CoreThreading._create_nonblocking_pipe() notifier.socket_add(CoreThreading._pipe[0], CoreThreading.run_queue) if purge: with CoreThreading._queue_lock: del CoreThreading._queue[:] elif CoreThreading._queue: # A thread is already running and wanted to run something in the # mainloop before the mainloop is started. In that case we need # to wakeup the loop ASAP to handle the requests. CoreThreading._wakeup() # Create wakeup fd pipe (Python 2.6) and install SIGCHLD handler. if hasattr(signal, 'set_wakeup_fd'): # Python 2.6+, so setup the signal wake pipe. if CoreThreading._signal_wake_pipe: # Stop monitoring old signal wake pipe. notifier.socket_remove(CoreThreading._signal_wake_pipe[0]) pipe = CoreThreading._create_nonblocking_pipe() notifier.socket_add(pipe[0], lambda fd: os.read(fd, 4096) and signals['unix-signal'].emit()) CoreThreading._signal_wake_pipe = pipe signal.signal(signal.SIGCHLD, lambda sig, frame: None) signal.set_wakeup_fd(pipe[1]) else: # With Python 2.5-, we can't wakeup the main loop. Use emit() # directly as the handler. signal.signal(signal.SIGCHLD, signals['unix-signal'].emit) # Emit now to reap processes that may have terminated before we set the # handler. process.py connects to this signal. signals['unix-signal'].emit()
def __init__(self): self.readmap = {} self.writemap = {} # Setup the wakeup file descriptor to avoid hanging on lost signals. wakeup_readfd, wakeup_writefd = os.pipe() self.register_read(wakeup_readfd, self.wakeup_handler) # TODO: remove test when we stop supporting Python <2.5 if hasattr(signal, 'set_wakeup_fd'): signal.set_wakeup_fd(wakeup_writefd) self.wakeup_writefd = None else: self.wakeup_writefd = wakeup_writefd
def child_start(self, gc_old_one=False): if not self.poller: self.poller, (r, w) = select.epoll(), os.pipe() signal.set_wakeup_fd(w) self.wakeup_fd = os.fdopen(r, 'rb', 0) self.poller.register(self.wakeup_fd, select.EPOLLIN) if self._child and gc_old_one: self._child.wait() self._child = None if not self.child_cmd or self._child: return self._child = subprocess.Popen( self.child_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True ) self._child_readline(init_line=True) # wait until it's ready
def test_set_wakeup_fd_socket_result(self): sock1 = socket.socket() self.addCleanup(sock1.close) fd1 = sock1.fileno() sock2 = socket.socket() self.addCleanup(sock2.close) fd2 = sock2.fileno() signal.set_wakeup_fd(fd1) self.assertEqual(signal.set_wakeup_fd(fd2), fd1) self.assertEqual(signal.set_wakeup_fd(-1), fd2) self.assertEqual(signal.set_wakeup_fd(-1), -1)
def _trap_sigwatch(self, current, sigset): # Initialize the signal handling part of the kernel if not done already # Note: This only works if running in the main thread if self._signals is None: self._signals = defaultdict(list) signal.set_wakeup_fd(self._notify_sock.fileno()) for signo in sigset.signos: if not self._signals[signo]: self._default_signals[signo] = signal.signal(signo, lambda signo, frame:None) self._signals[signo].append(sigset) self._reschedule_task(current)
def _init_signals(self): self._signal_sets = defaultdict(list) self._default_signals = { } old_fd = signal.set_wakeup_fd(self._notify_sock.fileno()) assert old_fd < 0, 'Signals already initialized %d' % old_fd
import sys import time timeout = 20 * 60 io_timeout = 30 parser = argparse.ArgumentParser() parser.add_argument('--io-timeout', action='store_true') args = parser.parse_args() # Some boilerplate to get SIGCHLD notification (via a file descriptor). (sig_poll_fd, sig_wake_fd) = os.pipe() os.set_blocking(sig_poll_fd, False) os.set_blocking(sig_wake_fd, False) old_wake_fd = signal.set_wakeup_fd(sig_wake_fd, warn_on_full_buffer=False) assert old_wake_fd == -1, 'Expected no wakeup FD to be present' # Need to register a handler, otherwise set_wakeup_fd() will not trigger. def noop_handler(signo, frame): pass old_handler = signal.signal(signal.SIGCHLD, noop_handler) assert not old_handler, 'Expected no SIGCHLD handler to be present' # Setup file descriptors to communicate with qemu. def try_unlink(path): try:
def block(self, blocking_event): """ The default setting for the RunEngine's during_task parameter. This makes it possible for plots that use matplotlib's Qt backend to update live during data acquisition. It solves the problem that Qt must be run from the main thread. If matplotlib and a known Qt binding are already imported, run the matplotlib qApp until the task completes. If not, there is no need to handle qApp: just wait on the task. """ global _qapp if 'matplotlib' not in sys.modules: # We are not using matplotlib + Qt. Just wait on the Event. blocking_event.wait() # Figure out if we are using matplotlib with which backend # without importing anything that is not already imported. else: import matplotlib backend = matplotlib.get_backend().lower() # if with a Qt backend, do the scary thing if 'qt' in backend: from matplotlib.backends.qt_compat import QtCore, QtWidgets app = QtWidgets.QApplication.instance() if app is None: _qapp = app = QtWidgets.QApplication([b'bluesky']) assert app is not None event_loop = QtCore.QEventLoop() def start_killer_thread(): def exit_loop(): blocking_event.wait() # If the above wait ends quickly, we need to avoid the race # condition where this thread might try to exit the qApp # before it even starts. Therefore, we use QTimer, below, # which will not start running until the qApp event loop is # running. event_loop.exit() threading.Thread(target=exit_loop).start() # https://www.riverbankcomputing.com/pipermail/pyqt/2015-March/035674.html # adapted from code at # https://bitbucket.org/tortoisehg/thg/commits/550e1df5fbad if os.name == 'posix' and hasattr(signal, 'set_wakeup_fd'): # Wake up Python interpreter via pipe so that SIGINT # can be handled immediately. # (http://qt-project.org/doc/qt-4.8/unix-signals.html) # Updated docs: # https://doc.qt.io/qt-5/unix-signals.html import fcntl rfd, wfd = os.pipe() for fd in (rfd, wfd): flags = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) wakeupsn = QtCore.QSocketNotifier( rfd, QtCore.QSocketNotifier.Read) origwakeupfd = signal.set_wakeup_fd(wfd) def cleanup(): wakeupsn.setEnabled(False) rfd = wakeupsn.socket() wfd = signal.set_wakeup_fd(origwakeupfd) os.close(int(rfd)) os.close(wfd) def handleWakeup(inp): # here Python signal handler will be invoked # this book-keeping is to drain the pipe wakeupsn.setEnabled(False) rfd = wakeupsn.socket() try: os.read(int(rfd), 4096) except OSError as inst: print('failed to read wakeup fd: %s\n' % inst) wakeupsn.setEnabled(True) wakeupsn.activated.connect(handleWakeup) else: # On Windows, non-blocking anonymous pipe or socket is # not available. def null(): ... # we need to 'kick' the python interpreter so it sees # system signals # https://stackoverflow.com/a/4939113/380231 kick_timer = QtCore.QTimer() kick_timer.timeout.connect(null) kick_timer.start(50) cleanup = kick_timer.stop # we also need to make sure that the qApp never sees # exceptions raised by python inside of a c++ callback (as # it will segfault itself because due to the way the # code is called there is no clear way to propagate that # back to the python code. vals = (None, None, None) old_sys_handler = sys.excepthook def my_exception_hook(exctype, value, traceback): nonlocal vals vals = (exctype, value, traceback) event_loop.exit() old_sys_handler(exctype, value, traceback) # this kill the Qt event loop when the plan is finished killer_timer = QtCore.QTimer() killer_timer.setSingleShot(True) killer_timer.timeout.connect(start_killer_thread) killer_timer.start(0) try: sys.excepthook = my_exception_hook event_loop.exec_() # make sure any pending signals are processed event_loop.processEvents() if vals[1] is not None: raise vals[1] finally: try: cleanup() finally: sys.excepthook = old_sys_handler elif 'ipympl' in backend or 'nbagg' in backend: Gcf = matplotlib._pylab_helpers.Gcf while True: done = blocking_event.wait(.1) for f_mgr in Gcf.get_all_fig_managers(): if f_mgr.canvas.figure.stale: f_mgr.canvas.draw() if done: return else: # We are not using matplotlib + Qt. Just wait on the Event. blocking_event.wait()
def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, cwd, pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, unshare_pid, cgroup): """ Execute a given binary with options @param binary: Name of program to execute @type binary: String @param mycommand: Options for program @type mycommand: String @param opt_name: Name of process (defaults to binary) @type opt_name: String @param fd_pipes: Mapping pipes to destination; { 0:0, 1:1, 2:2 } @type fd_pipes: Dictionary @param env: Key,Value mapping for Environmental Variables @type env: Dictionary @param gid: Group ID to run the process under @type gid: Integer @param groups: Groups the Process should be in. @type groups: Integer @param uid: User ID to run the process under @type uid: Integer @param umask: an int representing a unix umask (see man chmod for umask details) @type umask: Integer @param cwd: Current working directory @type cwd: String @param pre_exec: A function to be called with no arguments just prior to the exec call. @type pre_exec: callable @param unshare_net: If True, networking will be unshared from the spawned process @type unshare_net: Boolean @param unshare_ipc: If True, IPC will be unshared from the spawned process @type unshare_ipc: Boolean @param unshare_mount: If True, mount namespace will be unshared and mounts will be private to the namespace @type unshare_mount: Boolean @param unshare_pid: If True, PID ns will be unshared from the spawned process @type unshare_pid: Boolean @param cgroup: CGroup path to bind the process to @type cgroup: String @rtype: None @return: Never returns (calls os.execve) """ # If the process we're creating hasn't been given a name # assign it the name of the executable. if not opt_name: if binary is portage._python_interpreter: # NOTE: PyPy 1.7 will die due to "libary path not found" if argv[0] # does not contain the full path of the binary. opt_name = binary else: opt_name = os.path.basename(binary) # Set up the command's argument list. myargs = [opt_name] myargs.extend(mycommand[1:]) # Avoid a potential UnicodeEncodeError from os.execve(). myargs = [ _unicode_encode(x, encoding=_encodings['fs'], errors='strict') for x in myargs ] # Use default signal handlers in order to avoid problems # killing subprocesses as reported in bug #353239. signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) # Unregister SIGCHLD handler and wakeup_fd for the parent # process's event loop (bug 655656). signal.signal(signal.SIGCHLD, signal.SIG_DFL) try: wakeup_fd = signal.set_wakeup_fd(-1) if wakeup_fd > 0: os.close(wakeup_fd) except (ValueError, OSError): pass # Quiet killing of subprocesses by SIGPIPE (see bug #309001). signal.signal(signal.SIGPIPE, signal.SIG_DFL) # Avoid issues triggered by inheritance of SIGQUIT handler from # the parent process (see bug #289486). signal.signal(signal.SIGQUIT, signal.SIG_DFL) _setup_pipes(fd_pipes, close_fds=close_fds, inheritable=True) # Add to cgroup # it's better to do it from the child since we can guarantee # it is done before we start forking children if cgroup: with open(os.path.join(cgroup, 'cgroup.procs'), 'a') as f: f.write('%d\n' % os.getpid()) # Unshare (while still uid==0) if unshare_net or unshare_ipc or unshare_mount or unshare_pid: filename = find_library("c") if filename is not None: libc = LoadLibrary(filename) if libc is not None: # from /usr/include/bits/sched.h CLONE_NEWNS = 0x00020000 CLONE_NEWIPC = 0x08000000 CLONE_NEWPID = 0x20000000 CLONE_NEWNET = 0x40000000 flags = 0 if unshare_net: flags |= CLONE_NEWNET if unshare_ipc: flags |= CLONE_NEWIPC if unshare_mount: # NEWNS = mount namespace flags |= CLONE_NEWNS if unshare_pid: # we also need mount namespace for slave /proc flags |= CLONE_NEWPID | CLONE_NEWNS try: if libc.unshare(flags) != 0: writemsg( "Unable to unshare: %s\n" % (errno.errorcode.get(ctypes.get_errno(), '?')), noiselevel=-1) else: if unshare_pid: # pid namespace requires us to become init fork_ret = os.fork() if fork_ret != 0: os.execv(portage._python_interpreter, [ portage._python_interpreter, os.path.join(portage._bin_path, 'pid-ns-init'), '%s' % fork_ret, ]) if unshare_mount: # mark the whole filesystem as slave to avoid # mounts escaping the namespace s = subprocess.Popen( ['mount', '--make-rslave', '/']) mount_ret = s.wait() if mount_ret != 0: # TODO: should it be fatal maybe? writemsg("Unable to mark mounts slave: %d\n" % (mount_ret, ), noiselevel=-1) if unshare_pid: # we need at least /proc being slave s = subprocess.Popen( ['mount', '--make-slave', '/proc']) mount_ret = s.wait() if mount_ret != 0: # can't proceed with shared /proc writemsg("Unable to mark /proc slave: %d\n" % (mount_ret, ), noiselevel=-1) os._exit(1) # mount new /proc for our namespace s = subprocess.Popen( ['mount', '-t', 'proc', 'proc', '/proc']) mount_ret = s.wait() if mount_ret != 0: writemsg("Unable to mount new /proc: %d\n" % (mount_ret, ), noiselevel=-1) os._exit(1) if unshare_net: # 'up' the loopback IFF_UP = 0x1 ifreq = struct.pack('16sh', b'lo', IFF_UP) SIOCSIFFLAGS = 0x8914 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, 0) try: fcntl.ioctl(sock, SIOCSIFFLAGS, ifreq) except IOError as e: writemsg( "Unable to enable loopback interface: %s\n" % (errno.errorcode.get(e.errno, '?')), noiselevel=-1) sock.close() except AttributeError: # unshare() not supported by libc pass # Set requested process permissions. if gid: # Cast proxies to int, in case it matters. os.setgid(int(gid)) if groups: os.setgroups(groups) if uid: # Cast proxies to int, in case it matters. os.setuid(int(uid)) if umask: os.umask(umask) if cwd is not None: os.chdir(cwd) if pre_exec: pre_exec() # And switch to the new process. os.execve(binary, myargs, env)
def _maybe_allow_interrupt(qapp): """ This manager allows to terminate a plot by sending a SIGINT. It is necessary because the running Qt backend prevents Python interpreter to run and process signals (i.e., to raise KeyboardInterrupt exception). To solve this one needs to somehow wake up the interpreter and make it close the plot window. We do this by using the signal.set_wakeup_fd() function which organizes a write of the signal number into a socketpair connected to the QSocketNotifier (since it is part of the Qt backend, it can react to that write event). Afterwards, the Qt handler empties the socketpair by a recv() command to re-arm it (we need this if a signal different from SIGINT was caught by set_wakeup_fd() and we shall continue waiting). If the SIGINT was caught indeed, after exiting the on_signal() function the interpreter reacts to the SIGINT according to the handle() function which had been set up by a signal.signal() call: it causes the qt_object to exit by calling its quit() method. Finally, we call the old SIGINT handler with the same arguments that were given to our custom handle() handler. We do this only if the old handler for SIGINT was not None, which means that a non-python handler was installed, i.e. in Julia, and not SIG_IGN which means we should ignore the interrupts. """ old_sigint_handler = signal.getsignal(signal.SIGINT) handler_args = None skip = False if old_sigint_handler in (None, signal.SIG_IGN, signal.SIG_DFL): skip = True else: wsock, rsock = socket.socketpair() wsock.setblocking(False) old_wakeup_fd = signal.set_wakeup_fd(wsock.fileno()) sn = QtCore.QSocketNotifier(rsock.fileno(), _enum('QtCore.QSocketNotifier.Type').Read) # We do not actually care about this value other than running some # Python code to ensure that the interpreter has a chance to handle the # signal in Python land. We also need to drain the socket because it # will be written to as part of the wakeup! There are some cases where # this may fire too soon / more than once on Windows so we should be # forgiving about reading an empty socket. rsock.setblocking(False) # Clear the socket to re-arm the notifier. @sn.activated.connect def _may_clear_sock(*args): try: rsock.recv(1) except BlockingIOError: pass def handle(*args): nonlocal handler_args handler_args = args qapp.quit() signal.signal(signal.SIGINT, handle) try: yield finally: if not skip: wsock.close() rsock.close() sn.setEnabled(False) signal.set_wakeup_fd(old_wakeup_fd) signal.signal(signal.SIGINT, old_sigint_handler) if handler_args is not None: old_sigint_handler(*handler_args)
def start(self): # tonardo 使用 _running/_stopped 两个字段组合表示3种状态: # 1、就绪(初始化完成/已经结束):_running=False, _stopped=False; # 2、正在运行:_running=True, _stopped=False; # 3、正在结束:_running=False, _stopped=True; if self._running: raise RuntimeError("IOLoop is already running") self._setup_logging() if self._stopped: self._stopped = False return old_current = getattr(IOLoop._current, "instance", None) IOLoop._current.instance = self self._thread_ident = thread.get_ident() self._running = True # signal.set_wakeup_fd closes a race condition in event loops: # a signal may arrive at the beginning of select/poll/etc # before it goes into its interruptible sleep, so the signal # will be consumed without waking the select. The solution is # for the (C, synchronous) signal handler to write to a pipe, # which will then be seen by select. # # In python's signal handling semantics, this only matters on the # main thread (fortunately, set_wakeup_fd only works on the main # thread and will raise a ValueError otherwise). # # If someone has already set a wakeup fd, we don't want to # disturb it. This is an issue for twisted, which does its # SIGCHILD processing in response to its own wakeup fd being # written to. As long as the wakeup fd is registered on the IOLoop, # the loop will still wake up and everything should work. # # signal.set_wakeup_fd(fd) 设置文件描述符 fd , 当接收到信号时会在它上面写入一个 '\0' 字节。 # 用于唤醒被 poll 或 select 调用阻塞的进程,使进程能够处理信号。方法参数fd必须是以非阻塞 # (non-blocking)方式打开的文件描述符,否则无效。调用该方法返回上一次调用设置的文件描述符(没有 # 设置过则返回-1)。该方法只能在主线程中调用,在其他线程调用时将抛出 ValueError 异常。 # # 上述原注释中有提到 twisted 自身会设置 wakeup fd 处理 SIGCHILD 信号,所以在结合 twisted 使用时要注 # 意 override PosixReactorBase.installWaker 等与 waker 相关法方法(暂时对 twistd 不了解,猜测)。 # # self._waker.write_fileno()文件描述符的 READ 事件已经在 initialize 方法中加入 I/O 循环列表。 old_wakeup_fd = None if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': # requires python 2.6+, unix. set_wakeup_fd exists but crashes # the python process on windows. try: old_wakeup_fd = signal.set_wakeup_fd(self._waker.write_fileno()) if old_wakeup_fd != -1: # Already set, restore previous value. This is a little racy, # but there's no clean get_wakeup_fd and in real use the # IOLoop is just started once at the beginning. signal.set_wakeup_fd(old_wakeup_fd) old_wakeup_fd = None except ValueError: # non-main thread pass try: while True: # Prevent IO event starvation by delaying new callbacks # to the next iteration of the event loop. with self._callback_lock: callbacks = self._callbacks self._callbacks = [] # Add any timeouts that have come due to the callback list. # Do not run anything until we have determined which ones # are ready, so timeouts that call add_timeout cannot # schedule anything in this iteration. # # self._timeouts 是一个基于 heap 的 priority queue,存放 _Timeout 类型实例, # 按照到期时间由近到远和加入heap的先后顺序排序(参见 _Timeout 的 __lt__ 和 __le__ )。 due_timeouts = [] if self._timeouts: now = self.time() while self._timeouts: if self._timeouts[0].callback is None: # The timeout was cancelled. Note that the # cancellation check is repeated below for timeouts # that are cancelled by another timeout or callback. heapq.heappop(self._timeouts) self._cancellations -= 1 elif self._timeouts[0].deadline <= now: due_timeouts.append(heapq.heappop(self._timeouts)) else: break # 由于从 heap 中移除一个元素很复杂,所以 tornado 实现 remove_timeout 时将取消的 # timeout 对象保留在 heap 中,这样可能会导致内存问题,所以这里做了一个处理 512 的 # 阈值执行垃圾回收。remove_timeout 方法的注释中有说明。 if (self._cancellations > 512 and self._cancellations > (len(self._timeouts) >> 1)): # Clean up the timeout queue when it gets large and it's # more than half cancellations. self._cancellations = 0 self._timeouts = [x for x in self._timeouts if x.callback is not None] heapq.heapify(self._timeouts) for callback in callbacks: self._run_callback(callback) for timeout in due_timeouts: if timeout.callback is not None: self._run_callback(timeout.callback) # Closures may be holding on to a lot of memory, so allow # them to be freed before we go into our poll wait. # # 在进入poll等待之前释放闭包占用的内存,优化系统 callbacks = callback = due_timeouts = timeout = None # 优化 poll 等待超时时间: # 1、I/O 循环有 callback 需要处理时,不阻塞 poll 调用,也就是 poll_timeout=0; # 2、I/O 循环有 timeout 需要处理时,计算第一个 timeout(self._timeouts[0], # 最先超时需要处理的 timeout)距离现在的超时间隔,取 poll_timeout 默认值与 # 该间隔之间的最小值(以保证 timeout 一超时就能被I/O循环立即处理,不被 poll # 等待导致延时;若第一个 timeout 现在已经超时,则最小值<0,故需要与0比较修正); # 3、I/O 循环没有 callback 和 timeout 需要处理,则使用默认等待时间。 if self._callbacks: # If any callbacks or timeouts called add_callback, # we don't want to wait in poll() before we run them. poll_timeout = 0.0 elif self._timeouts: # If there are any timeouts, schedule the first one. # Use self.time() instead of 'now' to account for time # spent running callbacks. poll_timeout = self._timeouts[0].deadline - self.time() poll_timeout = max(0, min(poll_timeout, _POLL_TIMEOUT)) else: # No timeouts and no callbacks, so use the default. poll_timeout = _POLL_TIMEOUT if not self._running: break # 为了监视 I/O 循环的阻塞状态,tornado 提供了通过定时发送 SIGALRM 信号的方式来异步通知 # 进程 I/O 循环阻塞超过了预期的最大时间(self._blocking_signal_threshold)。 # # IOLoop.set_blocking_signal_threshold() 方法设置一个 signal.SIGALRM # 信号处理函数来监视 I/O 循环的阻塞时间。 # # poll 调用返回后( poll 等待时间不计入 I/O 循环阻塞时间),通过调用 signal.setitimer( # signal.ITIMER_REAL, self._blocking_signal_threshold, 0)设置定时器,每间 # 隔 _blocking_signal_threshold 发送一个 SIGALRM 信号,也就是说当 I/O 循环阻塞超 # 过 _blocking_signal_threshold 时会发送一个 SIGALRM 信号。 # # 进入 poll 之前调用signal.setitimer(signal.ITIMER_REAL, 0, 0)清理定时器,直到 # poll 返回后重新设置定时器。 if self._blocking_signal_threshold is not None: # clear alarm so it doesn't fire while poll is waiting for # events. signal.setitimer(signal.ITIMER_REAL, 0, 0) try: event_pairs = self._impl.poll(poll_timeout) except Exception as e: # Depending on python version and IOLoop implementation, # different exception types may be thrown and there are # two ways EINTR might be signaled: # * e.errno == errno.EINTR # * e.args is like (errno.EINTR, 'Interrupted system call') # # poll 调用可能会导致进程进入阻塞状态(sleep),这时候进程被信号唤醒后会引发 EINTR 错误( # 抛出异常的类型取决于 python 的版本和具体的 IOLoop 实现)。通过 signal.set_wakeup_fd() # 设置 wakeup fd 来捕获信号进行处理,不引发 InterruptedError[Raised when a system call is # interrupted by an incoming signal. Corresponds to errno EINTR.])。 # # 注:这种会导致当前进程(线程)进入阻塞的系统调用被称为慢系统调用(slow system call),比如 accept 、 # read 、 write 、 select 、和 open 之类的函数。 if errno_from_exception(e) == errno.EINTR: continue else: raise # 设置定时器以便在I/O循环阻塞超过预期时间时发送 SIGALRM 信号。 # # signal.setitimer 函数,提供三种定时器,它们相互独立,任意一个定时完成都将发送定时信号到进程,并且自动重新计时。 # 1、ITIMER_REAL,计时器的值实时递减(以系统实时时间来计算 ),超时发送 SIGALRM 信号。 # 2、ITIMER_VIRT,进程执行时递减计时器的值(只计算(用户态)进程的执行时间),超时发送 SIGVTALRM 信号。 # 3、ITIMER_PROF,进程和系统执行时都递减计时器的值。结合 ITIMER_VIRTUAL, 常常被用于分析程序在用户态和内核态 # 花费的时间。超时发送 SIGPROF 信号。 if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, self._blocking_signal_threshold, 0) # Pop one fd at a time from the set of pending fds and run # its handler. Since that handler may perform actions on # other file descriptors, there may be reentrant calls to # this IOLoop that update self._events # # 由于一个 handler 可能会操作其他文件描述符与 IOLoop 进行交互,比如调用 # IOLoop.remove_handler 方法等将导致 self._events 被修改。所以使用 # while 循环而不是 for 循环(要求迭代期间 self._events 不能被修改)。 self._events.update(event_pairs) while self._events: fd, events = self._events.popitem() try: fd_obj, handler_func = self._handlers[fd] handler_func(fd_obj, events) except (OSError, IOError) as e: if errno_from_exception(e) == errno.EPIPE: # Happens when the client closes the connection pass else: self.handle_callback_exception(self._handlers.get(fd)) except Exception: self.handle_callback_exception(self._handlers.get(fd)) fd_obj = handler_func = None finally: # reset the stopped flag so another start/stop pair can be issued # # I/O循环结束重置_stopped状态,清理定时器,将当前 IOLoop 实例从当前线程移除绑定。 self._stopped = False if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, 0, 0) IOLoop._current.instance = old_current if old_wakeup_fd is not None: signal.set_wakeup_fd(old_wakeup_fd)
def start(self): if self._running: raise RuntimeError("IOLoop is already running") self._setup_logging() if self._stopped: self._stopped = False return old_current = getattr(IOLoop._current, "instance", None) IOLoop._current.instance = self # TODO 这里需要了解一下 python thread self._thread_ident = thread.get_ident() self._running = True # signal.set_wakeup_fd closes a race condition in event loops: # signal.set_wakeup_fd 在事件循环的时候关闭竞争条件: # a signal may arrive at the beginning of select/poll/etc before it goes into its interruptible sleep, # 信号可以可以在select/poll/etc进入可中断休眠之前到达。 # so the signal will be consumed without waking the select. # 所以信号会被消费掉而避免唤醒select。 # The solution is for the (C, 同步) signal handler to write to a pipe, which will then be seen by select. # 该解决方案是为(C, 同步)信号处理程序写入管道, 然后会被select接收到。 # In python's signal handling semantics, this only matters on the main thread (fortunately, set_wakeup_fd only works on the main thread and will raise a ValueError otherwise). # 在Python中的信号处理的逻辑,只会在主线程上(幸运的是,set_wakeup_fd只能工作在主线程,将抛出ValueError异常,otherwise)。 # If someone has already set a wakeup fd, we don't want to disturb it. # 如果有人已经设置唤醒FD,我们不想中断它。 # This is an issue for twisted, which does its SIGCHLD processing in response to its own wakeup fd being written to. # 这是twisted的一个问题,它的SIGCHLD处理程序响应自己唤醒FD被写入。 # As long as the wakeup fd is registered on the IOLoop, the loop will still wake up and everything should work. # 只要唤醒FD是在IOLoop注册,轮询仍然会被唤醒并且执行。 old_wakeup_fd = None if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': # requires python 2.6+, unix. set_wakeup_fd exists but crashes # 需要python 2.6+, nuix. set_wakeup_fd存在崩溃的情况。 # the python process on windows. try: # TODO 这里需要了解一下python signal old_wakeup_fd = signal.set_wakeup_fd( self._waker.write_fileno()) if old_wakeup_fd != -1: # Already set, restore previous value. This is a little racy, but there's no clean get_wakeup_fd and in real use the IOLoop is just started once at the beginning. # 已经设置了恢复以前的值。这有点不好,只因为没有清理get_wakeup_fd然后IOLoop就在开始的时候仅仅实际使用了一次。 signal.set_wakeup_fd(old_wakeup_fd) old_wakeup_fd = None except ValueError: # Non-main thread, or the previous value of wakeup_fd is no longer valid. # 不是主线程,或者之前的值对wakeup_fd不再有效。 old_wakeup_fd = None try: while True: # Prevent IO event starvation by delaying new callbacks to the next iteration of the event loop. # 通过延迟一个新的回调事件防止在下一次事件轮询中使IO事件处于饥饿状态。 with self._callback_lock: callbacks = self._callbacks self._callbacks = [] # Add any timeouts that have come due to the callback list. # 把回调列表中所有超时的都记录下来。 # Do not run anything until we have determined which ones are ready, so timeouts that call add_timeout cannot schedule anything in this iteration. # 不要运行任何东西直到我们确定了哪些东西是准备好了的了,所以在本次迭代中超时的都无法调用add_timeout。 due_timeouts = [] if self._timeouts: now = self.time() while self._timeouts: if self._timeouts[0].callback is None: # The timeout was cancelled. Note that the cancellation check is repeated below for timeouts that are cancelled by another timeout or callback. # 取消超时。 heapq.heappop(self._timeouts) self._cancellations -= 1 elif self._timeouts[0].deadline <= now: due_timeouts.append(heapq.heappop(self._timeouts)) else: break if (self._cancellations > 512 and self._cancellations > (len(self._timeouts) >> 1)): # Clean up the timeout queue when it gets large and it's more than half cancellations. # 当超时队列变大并且超过一半取消对象的时候清理一下。 self._cancellations = 0 self._timeouts = [ x for x in self._timeouts if x.callback is not None ] heapq.heapify(self._timeouts) for callback in callbacks: self._run_callback(callback) for timeout in due_timeouts: if timeout.callback is not None: self._run_callback(timeout.callback) # Closures may be holding on to a lot of memory, so allow them to be freed before we go into our poll wait. # 闭包会占用大量内存,所以我们在进入轮寻等待之前先把他们释放了。 callbacks = callback = due_timeouts = timeout = None if self._callbacks: # If any callbacks or timeouts called add_callback, we don't want to wait in poll() before we run them. # 如果任何的回调或超时对象被add_callback调用,我们不想在运行之前在poll()中等待。 poll_timeout = 0.0 elif self._timeouts: # If there are any timeouts, schedule the first one. # 如果有任何的超时对象,调用第一个。 # Use self.time() instead of 'now' to account for time spent running callbacks. # 使用 self.time() 代替 'now' 去占用运行回调的时间。 poll_timeout = self._timeouts[0].deadline - self.time() poll_timeout = max(0, min(poll_timeout, _POLL_TIMEOUT)) else: # No timeouts and no callbacks, so use the default. # 没有超时对象和回调对象,就使用默认值。 poll_timeout = _POLL_TIMEOUT if not self._running: break if self._blocking_signal_threshold is not None: # clear alarm so it doesn't fire while poll is waiting for events. signal.setitimer(signal.ITIMER_REAL, 0, 0) try: event_pairs = self._impl.poll(poll_timeout) except Exception as e: # Depending on python version and IOLoop implementation, # different exception types may be thrown and there are # two ways EINTR might be signaled: # * e.errno == errno.EINTR # * e.args is like (errno.EINTR, 'Interrupted system call') if errno_from_exception(e) == errno.EINTR: continue else: raise if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, self._blocking_signal_threshold, 0) # Pop one fd at a time from the set of pending fds and run its handler. # 弹出一个fd对象从挂起这个文件描述符的时间和他运行的操作对象。 # Since that handler may perform actions on other file descriptors, # 因为该处理程序可以在其他文件描述符上执行操作 # there may be reentrant calls to this IOLoop that update self._events # 有可能是重新调用IOLoop然后更新self._events self._events.update(event_pairs) while self._events: fd, events = self._events.popitem() try: fd_obj, handler_func = self._handlers[fd] handler_func(fd_obj, events) except (OSError, IOError) as e: if errno_from_exception(e) == errno.EPIPE: # Happens when the client closes the connection pass else: self.handle_callback_exception( self._handlers.get(fd)) except Exception: self.handle_callback_exception(self._handlers.get(fd)) fd_obj = handler_func = None finally: # reset the stopped flag so another start/stop pair can be issued self._stopped = False if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, 0, 0) IOLoop._current.instance = old_current if old_wakeup_fd is not None: signal.set_wakeup_fd(old_wakeup_fd)
def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, cwd, pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, unshare_pid, unshare_flags, cgroup): """ Execute a given binary with options @param binary: Name of program to execute @type binary: String @param mycommand: Options for program @type mycommand: String @param opt_name: Name of process (defaults to binary) @type opt_name: String @param fd_pipes: Mapping pipes to destination; { 0:0, 1:1, 2:2 } @type fd_pipes: Dictionary @param env: Key,Value mapping for Environmental Variables @type env: Dictionary @param gid: Group ID to run the process under @type gid: Integer @param groups: Groups the Process should be in. @type groups: List @param uid: User ID to run the process under @type uid: Integer @param umask: an int representing a unix umask (see man chmod for umask details) @type umask: Integer @param cwd: Current working directory @type cwd: String @param pre_exec: A function to be called with no arguments just prior to the exec call. @type pre_exec: callable @param unshare_net: If True, networking will be unshared from the spawned process @type unshare_net: Boolean @param unshare_ipc: If True, IPC will be unshared from the spawned process @type unshare_ipc: Boolean @param unshare_mount: If True, mount namespace will be unshared and mounts will be private to the namespace @type unshare_mount: Boolean @param unshare_pid: If True, PID ns will be unshared from the spawned process @type unshare_pid: Boolean @param unshare_flags: Flags for the unshare(2) function @type unshare_flags: Integer @param cgroup: CGroup path to bind the process to @type cgroup: String @rtype: None @return: Never returns (calls os.execve) """ # If the process we're creating hasn't been given a name # assign it the name of the executable. if not opt_name: if binary is portage._python_interpreter: # NOTE: PyPy 1.7 will die due to "libary path not found" if argv[0] # does not contain the full path of the binary. opt_name = binary else: opt_name = os.path.basename(binary) # Set up the command's argument list. myargs = [opt_name] myargs.extend(mycommand[1:]) # Avoid a potential UnicodeEncodeError from os.execve(). myargs = [_unicode_encode(x, encoding=_encodings['fs'], errors='strict') for x in myargs] # Use default signal handlers in order to avoid problems # killing subprocesses as reported in bug #353239. signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) # Unregister SIGCHLD handler and wakeup_fd for the parent # process's event loop (bug 655656). signal.signal(signal.SIGCHLD, signal.SIG_DFL) try: wakeup_fd = signal.set_wakeup_fd(-1) if wakeup_fd > 0: os.close(wakeup_fd) except (ValueError, OSError): pass # Quiet killing of subprocesses by SIGPIPE (see bug #309001). signal.signal(signal.SIGPIPE, signal.SIG_DFL) # Avoid issues triggered by inheritance of SIGQUIT handler from # the parent process (see bug #289486). signal.signal(signal.SIGQUIT, signal.SIG_DFL) _setup_pipes(fd_pipes, close_fds=close_fds, inheritable=True) # Add to cgroup # it's better to do it from the child since we can guarantee # it is done before we start forking children if cgroup: with open(os.path.join(cgroup, 'cgroup.procs'), 'a') as f: f.write('%d\n' % os.getpid()) # Unshare (while still uid==0) if unshare_net or unshare_ipc or unshare_mount or unshare_pid: filename = find_library("c") if filename is not None: libc = LoadLibrary(filename) if libc is not None: try: # Since a failed unshare call could corrupt process # state, first validate that the call can succeed. # The parent process should call _unshare_validate # before it forks, so that all child processes can # reuse _unshare_validate results that have been # cached by the parent process. errno_value = _unshare_validate(unshare_flags) if errno_value == 0 and libc.unshare(unshare_flags) != 0: errno_value = ctypes.get_errno() if errno_value != 0: involved_features = [] if unshare_ipc: involved_features.append('ipc-sandbox') if unshare_mount: involved_features.append('mount-sandbox') if unshare_net: involved_features.append('network-sandbox') if unshare_pid: involved_features.append('pid-sandbox') writemsg("Unable to unshare: %s (for FEATURES=\"%s\")\n" % ( errno.errorcode.get(errno_value, '?'), ' '.join(involved_features)), noiselevel=-1) else: if unshare_pid: main_child_pid = os.fork() if main_child_pid == 0: # pid namespace requires us to become init binary, myargs = portage._python_interpreter, [ portage._python_interpreter, os.path.join(portage._bin_path, 'pid-ns-init'), _unicode_encode('' if uid is None else str(uid)), _unicode_encode('' if gid is None else str(gid)), _unicode_encode('' if groups is None else ','.join(str(group) for group in groups)), _unicode_encode('' if umask is None else str(umask)), _unicode_encode(','.join(str(fd) for fd in fd_pipes)), binary] + myargs uid = None gid = None groups = None umask = None else: # Execute a supervisor process which will forward # signals to init and forward exit status to the # parent process. The supervisor process runs in # the global pid namespace, so skip /proc remount # and other setup that's intended only for the # init process. binary, myargs = portage._python_interpreter, [ portage._python_interpreter, os.path.join(portage._bin_path, 'pid-ns-init'), str(main_child_pid)] os.execve(binary, myargs, env) if unshare_mount: # mark the whole filesystem as slave to avoid # mounts escaping the namespace s = subprocess.Popen(['mount', '--make-rslave', '/']) mount_ret = s.wait() if mount_ret != 0: # TODO: should it be fatal maybe? writemsg("Unable to mark mounts slave: %d\n" % (mount_ret,), noiselevel=-1) if unshare_pid: # we need at least /proc being slave s = subprocess.Popen(['mount', '--make-slave', '/proc']) mount_ret = s.wait() if mount_ret != 0: # can't proceed with shared /proc writemsg("Unable to mark /proc slave: %d\n" % (mount_ret,), noiselevel=-1) os._exit(1) # mount new /proc for our namespace s = subprocess.Popen(['mount', '-n', '-t', 'proc', 'proc', '/proc']) mount_ret = s.wait() if mount_ret != 0: writemsg("Unable to mount new /proc: %d\n" % (mount_ret,), noiselevel=-1) os._exit(1) if unshare_net: # use 'localhost' to avoid hostname resolution problems try: # pypy3 does not implement socket.sethostname() new_hostname = b'localhost' if hasattr(socket, 'sethostname'): socket.sethostname(new_hostname) else: if libc.sethostname(new_hostname, len(new_hostname)) != 0: errno_value = ctypes.get_errno() raise OSError(errno_value, os.strerror(errno_value)) except Exception as e: writemsg("Unable to set hostname: %s (for FEATURES=\"network-sandbox\")\n" % ( e,), noiselevel=-1) _configure_loopback_interface() except AttributeError: # unshare() not supported by libc pass # Set requested process permissions. if gid: # Cast proxies to int, in case it matters. os.setgid(int(gid)) if groups: os.setgroups(groups) if uid: # Cast proxies to int, in case it matters. os.setuid(int(uid)) if umask: os.umask(umask) if cwd is not None: os.chdir(cwd) if pre_exec: pre_exec() # And switch to the new process. os.execve(binary, myargs, env)
def __del__(self): # Restore any old handler on deletion if self.old_fd is not None and signal and signal.set_wakeup_fd: signal.set_wakeup_fd(self.old_fd)
def test_basic(self): self.assertEqual(signal.set_wakeup_fd(-1), -1) with wakeup_on_signal(): pass self.assertEqual(signal.set_wakeup_fd(-1), -1)
def cleanup(): wakeupsn.setEnabled(False) rfd = wakeupsn.socket() wfd = signal.set_wakeup_fd(origwakeupfd) os.close(int(rfd)) os.close(wfd)
def start(self): if self._running: raise RuntimeError("IOLoop is already running") self._setup_logging() if self._stopped: self._stopped = False return old_current = getattr(IOLoop._current, "instance", None) IOLoop._current.instance = self self._thread_ident = thread.get_ident() # 保存当前线程ID self._running = True # signal.set_wakeup_fd closes a race condition in event loops: # a signal may arrive at the beginning of select/poll/etc # before it goes into its interruptible sleep, so the signal # will be consumed without waking the select. The solution is # for the (C, synchronous) signal handler to write to a pipe, # which will then be seen by select. # # In python's signal handling semantics, this only matters on the # main thread (fortunately, set_wakeup_fd only works on the main # thread and will raise a ValueError otherwise). # # If someone has already set a wakeup fd, we don't want to # disturb it. This is an issue for twisted, which does its # SIGCHLD processing in response to its own wakeup fd being # written to. As long as the wakeup fd is registered on the IOLoop, # the loop will still wake up and everything should work. old_wakeup_fd = None if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': # requires python 2.6+, unix. set_wakeup_fd exists but crashes # the python process on windows. try: old_wakeup_fd = signal.set_wakeup_fd( self._waker.write_fileno()) # 已经设置,还原回去 if old_wakeup_fd != -1: # Already set, restore previous value. This is a little racy, # but there's no clean get_wakeup_fd and in real use the # IOLoop is just started once at the beginning. signal.set_wakeup_fd(old_wakeup_fd) old_wakeup_fd = None except ValueError: # Non-main thread, or the previous value of wakeup_fd # is no longer valid. old_wakeup_fd = None try: while True: # Prevent IO event starvation by delaying new callbacks # to the next iteration of the event loop. # self._callbacks为立即事件,每次循环就会马上回调 # 并且只回调一次就清空 with self._callback_lock: callbacks = self._callbacks self._callbacks = [] # Add any timeouts that have come due to the callback list. # Do not run anything until we have determined which ones # are ready, so timeouts that call add_timeout cannot # schedule anything in this iteration. due_timeouts = [] # self._timeouts为二叉堆,用于管理定时器 if self._timeouts: now = self.time() while self._timeouts: # 定时器的回调函数有可能为None的情况,因为用户删除定时器,内部并不会 # 立即从二叉堆中删除,而是直接将其回调标记为None,因为 # 从二叉堆中删除会造成不必要的开销 # 国内科学软件Shadowsocks源码tcprelay.py中的class TCPRelay借鉴了此方法 # 在_sweep_timeout方法中可以找到,只是在此基础上修改,使用sorted list进行存储 # 通过map存储每个callback在sorted list的下标 if self._timeouts[0].callback is None: # The timeout was cancelled. Note that the # cancellation check is repeated below for timeouts # that are cancelled by another timeout or callback. heapq.heappop(self._timeouts) self._cancellations -= 1 elif self._timeouts[0].deadline <= now: # 已经超时的定时器暂时放到due_timeouts当中 due_timeouts.append(heapq.heappop(self._timeouts)) else: break # 二叉堆中已被删除的定时器数目超过512或者大于二叉堆总数的一半 # 那么重新整理二叉堆,将删除的定时器从二叉堆移除,然后重新构建二叉堆 if (self._cancellations > 512 and self._cancellations > (len(self._timeouts) >> 1)): # Clean up the timeout queue when it gets large and it's # more than half cancellations. self._cancellations = 0 self._timeouts = [ x for x in self._timeouts if x.callback is not None ] # 重新构建二叉堆 heapq.heapify(self._timeouts) # 先运行callback for callback in callbacks: self._run_callback(callback) # 再运行callback_timeout for timeout in due_timeouts: if timeout.callback is not None: self._run_callback(timeout.callback) # Closures may be holding on to a lot of memory, so allow # them to be freed before we go into our poll wait. callbacks = callback = due_timeouts = timeout = None # 这里获取下次循环超时的时间 # 1. 有立即回调的函数即self._callbacks非空,poll_timeout为0(有可能在上面回调的时候添加) # 2. 如果有定时器存在,从二叉堆顶获取最短触发的定时器的超时时间 # 3. ioloop中没有任何事件,默认使用_POLL_TIMEOUT,3600秒 if self._callbacks: # If any callbacks or timeouts called add_callback, # we don't want to wait in poll() before we run them. poll_timeout = 0.0 elif self._timeouts: # If there are any timeouts, schedule the first one. # Use self.time() instead of 'now' to account for time # spent running callbacks. poll_timeout = self._timeouts[0].deadline - self.time() poll_timeout = max(0, min(poll_timeout, _POLL_TIMEOUT)) else: # No timeouts and no callbacks, so use the default. poll_timeout = _POLL_TIMEOUT if not self._running: break if self._blocking_signal_threshold is not None: # clear alarm so it doesn't fire while poll is waiting for # events. signal.setitimer(signal.ITIMER_REAL, 0, 0) try: # self._impl从外部传递进来,底层操作系统IO多路复用API # 返回发生的事件列表以及对应的事件类型 event_pairs = self._impl.poll(poll_timeout) except Exception as e: # Depending on python version and IOLoop implementation, # different exception types may be thrown and there are # two ways EINTR might be signaled: # * e.errno == errno.EINTR # * e.args is like (errno.EINTR, 'Interrupted system call') if errno_from_exception(e) == errno.EINTR: continue else: raise if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, self._blocking_signal_threshold, 0) # Pop one fd at a time from the set of pending fds and run # its handler. Since that handler may perform actions on # other file descriptors, there may be reentrant calls to # this IOLoop that modify self._events self._events.update(event_pairs) while self._events: fd, events = self._events.popitem() try: fd_obj, handler_func = self._handlers[fd] # 这里调用与文件描述符对应的回调函数 handler_func(fd_obj, events) except (OSError, IOError) as e: if errno_from_exception(e) == errno.EPIPE: # Happens when the client closes the connection pass else: self.handle_callback_exception( self._handlers.get(fd)) except Exception: self.handle_callback_exception(self._handlers.get(fd)) fd_obj = handler_func = None finally: # reset the stopped flag so another start/stop pair can be issued self._stopped = False if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, 0, 0) IOLoop._current.instance = old_current if old_wakeup_fd is not None: signal.set_wakeup_fd(old_wakeup_fd)
def add_signal_watch(signal_action): """Catches signals which should exit the program and calls `signal_action` after the main loop has started, even if the signal occurred before the main loop has started. """ sig_names = ["SIGINT", "SIGTERM", "SIGHUP"] if os.name == "nt": sig_names = ["SIGINT", "SIGTERM"] signals = {} for name in sig_names: id_ = getattr(signal, name, None) if id_ is None: continue signals[id_] = name # in case Python catches a signal, wake up the mainloop. # this makes signal handling work with older pygobject/glib (Ubuntu 12.04) # no idea why.. rfd, wfd = os.pipe() def wakeup_notify(source, condition): # just read and do nothing so we can keep the watch around if condition == GLib.IO_IN: try: os.read(rfd, 1) except EnvironmentError: pass return True else: return False try: import fcntl except ImportError: pass else: fcntl.fcntl(wfd, fcntl.F_SETFL, os.O_NONBLOCK) signal.set_wakeup_fd(wfd) io_add_watch(rfd, GLib.PRIORITY_HIGH, GLib.IO_IN | GLib.IO_ERR | GLib.IO_HUP, wakeup_notify) # set a python handler for each signal, used before the mainloop for signum, name in signals.items(): # Before the mainloop starts we catch signals in python # directly and idle_add the app.quit def idle_handler(signum, frame): print_d("Python signal handler activated: %s" % signals[signum]) GLib.idle_add(signal_action, priority=GLib.PRIORITY_HIGH) print_d("Register Python signal handler: %r" % name) signal.signal(signum, idle_handler) if os.name == "nt": return # also try to use the official glib handling if available, # can't hurt I guess unix_signal_add = None if hasattr(GLib, "unix_signal_add"): unix_signal_add = GLib.unix_signal_add elif hasattr(GLib, "unix_signal_add_full"): unix_signal_add = GLib.unix_signal_add_full else: print_d("Can't install GLib signal handler, too old gi or wrong OS") return for signum, name in signals.items(): def handler(signum): print_d("GLib signal handler activated: %s" % signals[signum]) signal_action() print_d("Register GLib signal handler: %r" % name) unix_signal_add(GLib.PRIORITY_HIGH, signum, handler, signum)
def handle(self, *args, **options): # FIXME: this function is getting much too long and complex. del logging.root.handlers[:] del logging.root.filters[:] # Create the logger FORMAT = '%(asctime)-15s %(levelname)s %(message)s' # pylint: disable=invalid-name logging.basicConfig(format=FORMAT, filename='/var/log/lava-server/lava-master.log') self.logger = logging.getLogger('dispatcher-master') if options['level'] == 'ERROR': self.logger.setLevel(logging.ERROR) elif options['level'] == 'WARN': self.logger.setLevel(logging.WARN) elif options['level'] == 'INFO': self.logger.setLevel(logging.INFO) else: self.logger.setLevel(logging.DEBUG) # Create the sockets context = zmq.Context() pull_socket = context.socket(zmq.PULL) pull_socket.bind(options['log_socket']) controler = context.socket(zmq.ROUTER) controler.bind(options['master_socket']) # List of logs logs = {} # List of known dispatchers. At startup do not laod this from the # database. This will help to know if the slave as restarted or not. dispatchers = {} # Last access to the database for new jobs and cancelations last_db_access = 0 # Poll on the sockets (only one for the moment). This allow to have a # nice timeout along with polling. poller = zmq.Poller() poller.register(pull_socket, zmq.POLLIN) poller.register(controler, zmq.POLLIN) # Mask signals and create a pipe that will receive a bit for each # signal received. Poll the pipe along with the zmq socket so that we # can only be interupted while reading data. (pipe_r, pipe_w) = os.pipe() flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags | os.O_NONBLOCK) signal.set_wakeup_fd(pipe_w) signal.signal(signal.SIGINT, lambda x, y: None) signal.signal(signal.SIGTERM, lambda x, y: None) signal.signal(signal.SIGQUIT, lambda x, y: None) poller.register(pipe_r, zmq.POLLIN) self.logger.info("[INIT] LAVA dispatcher-master has started.") while True: try: # TODO: Fix the timeout computation # Wait for data or a timeout sockets = dict(poller.poll(TIMEOUT * 1000)) except zmq.error.ZMQError: continue if sockets.get(pipe_r) == zmq.POLLIN: self.logger.info("[POLL] Received a signal, leaving") break # Logging socket if sockets.get(pull_socket) == zmq.POLLIN: msg = pull_socket.recv_multipart() try: (job_id, level, name, message) = msg except ValueError: # do not let a bad message stop the master. self.logger.error( "Failed to parse log message, skipping: %s", msg) continue try: scanned = yaml.load(message) except yaml.YAMLError: # failure to scan is not an error here, it just means the message is not a result scanned = None # the results logger wraps the OrderedDict in a dict called results, for identification, # YAML then puts that into a list of one item for each call to log.results. if type(scanned) is list and len(scanned) == 1: if type(scanned[0]) is dict and 'results' in scanned[0]: job = TestJob.objects.get(id=job_id) ret = map_scanned_results(scanned_dict=scanned[0], job=job) if not ret: self.logger.warning( "[%s] Unable to map scanned results: %s" % (job_id, yaml.dump(scanned[0]))) # Clear filename if '/' in level or '/' in name: self.logger.error( "[%s] Wrong level or name received, dropping the message", job_id) continue filename = "%s/job-%s/pipeline/%s/%s-%s.log" % ( options['output_dir'], job_id, level.split('.')[0], level, name) # Find the handler (if available) f_handler = None if job_id in logs: if filename != logs[job_id].filename: # Close the old file handler logs[job_id].close() path = os.path.join('/tmp', 'lava-dispatcher', 'jobs', job_id, filename) mkdir(os.path.dirname(path)) logs[job_id] = FileHandler(filename, path) else: self.logger.info("[%s] Receiving logs from a new job", job_id) path = os.path.join('/tmp', 'lava-dispatcher', 'jobs', job_id, filename) mkdir(os.path.dirname(path)) logs[job_id] = FileHandler(filename, path) # Mark the file handler as used # TODO: try to use a more pythonnic way logs[job_id].last_usage = time.time() # n.b. logging here would produce a log entry for every message in every job. # Write data f_handler = logs[job_id].fd f_handler.write(message) f_handler.write('\n') f_handler.flush() # FIXME: to be removed when the web UI knows how to deal with # pipeline logs filename = os.path.join(options['output_dir'], "job-%s" % job_id, 'output.txt') with open(filename, 'a+') as f_out: f_out.write(message) f_out.write('\n') # Garbage collect file handlers now = time.time() for job_id in logs.keys(): if now - logs[job_id].last_usage > FD_TIMEOUT: self.logger.info("[%s] Collecting file handler '%s'", job_id, logs[job_id].filename) logs[job_id].close() del logs[job_id] # Command socket if sockets.get(controler) == zmq.POLLIN: msg = controler.recv_multipart() self.logger.debug("[CC] Receiving: %s", msg) # 1: the hostname (see ZMQ documentation) hostname = msg[0] # 2: the action action = msg[1] # Handle the actions if action == 'HELLO': self.logger.info("%s => HELLO", hostname) controler.send_multipart([hostname, 'HELLO_OK']) # If the dispatcher is known and sent an HELLO, means that # the slave has restarted if hostname in dispatchers: self.logger.warning("Dispatcher <%s> has RESTARTED", hostname) else: self.logger.warning("New dispatcher <%s>", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) self._cancel_slave_dispatcher_jobs(hostname) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == "HELLO_RETRY": self.logger.info("%s => HELLO_RETRY", hostname) controler.send_multipart([hostname, "HELLO_OK"]) if hostname in dispatchers: # Assume the HELLO command was received, and the # action succeeded. self.logger.warning( "Dispatcher <%s> was not confirmed", hostname) else: # No dispatcher, treat it as a normal HELLO message. self.logger.warning("New dispatcher <%s>", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) self._cancel_slave_dispatcher_jobs(hostname) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'PING': self.logger.debug("%s => PING", hostname) # Send back a signal controler.send_multipart([hostname, 'PONG']) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning( "Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'END': status = TestJob.COMPLETE try: job_id = int(msg[2]) job_status = int(msg[3]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg) continue if job_status: self.logger.info("[%d] %s => END with error %d", job_id, hostname, job_status) status = TestJob.INCOMPLETE else: self.logger.info("[%d] %s => END", job_id, hostname) try: with transaction.atomic(): job = TestJob.objects.select_for_update() \ .get(id=job_id) if job.status == TestJob.CANCELING: cancel_job(job) else: end_job(job, job_status=status) except TestJob.DoesNotExist: self.logger.error("[%d] Unknown job", job_id) # ACK even if the job is unknown to let the dispatcher # forget about it controler.send_multipart([hostname, 'END_OK', str(job_id)]) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning( "Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() elif action == 'START_OK': try: job_id = int(msg[2]) except (IndexError, ValueError): self.logger.error("Invalid message from <%s> '%s'", hostname, msg) continue self.logger.info("[%d] %s => START_OK", job_id, hostname) try: with transaction.atomic(): job = TestJob.objects.select_for_update() \ .get(id=job_id) start_job(job) except TestJob.DoesNotExist: self.logger.error("[%d] Unknown job", job_id) if hostname not in dispatchers: # The server crashed: send a STATUS message self.logger.warning( "Unknown dispatcher <%s> (server crashed)", hostname) dispatchers[hostname] = SlaveDispatcher(hostname, online=True) send_status(hostname, controler, self.logger) # Mark the dispatcher as alive dispatchers[hostname].alive() else: self.logger.error("<%s> sent unknown action=%s, args=(%s)", hostname, action, msg[1:]) # Check dispatchers status now = time.time() for hostname in dispatchers.keys(): dispatcher = dispatchers[hostname] if dispatcher.online and now - dispatcher.last_msg > DISPATCHER_TIMEOUT: self.logger.error("Dispatcher <%s> goes OFFLINE", hostname) dispatchers[hostname].online = False # TODO: DB: mark the dispatcher as offline and attached # devices # Limit accesses to the database. This will also limit the rate of # CANCEL and START messages if now - last_db_access > DB_LIMIT: last_db_access = now # Dispatch jobs # TODO: make this atomic not_allocated = 0 # only pick up pipeline jobs with devices in Reserved state for job in TestJob.objects.filter( status=TestJob.SUBMITTED, is_pipeline=True, actual_device__isnull=False).order_by( '-health_check', '-priority', 'submit_time', 'target_group', 'id'): if job.dynamic_connection: # A secondary connection must be made from a dispatcher local to the host device # to allow for local firewalls etc. So the secondary connection is started on the # remote worker of the "nominated" host. # FIXME: worker_host = job.lookup_worker self.logger.info("[%d] START => %s (connection)", job.id, worker_host.hostname) else: device = select_device(job) if not device: continue # selecting device can change the job job = TestJob.objects.get(id=job.id) self.logger.info("[%d] Assigning %s device", job.id, device) if job.actual_device is None: device = job.requested_device # Launch the job create_job(job, device) self.logger.info("[%d] START => %s (%s)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host else: device = job.actual_device self.logger.info( "[%d] START => %s (%s) (retrying)", job.id, device.worker_host.hostname, device.hostname) worker_host = device.worker_host try: # Load job definition to get the variables for template # rendering job_def = yaml.load(job.definition) job_ctx = job_def.get('context', {}) # Load device configuration device_configuration = None \ if job.dynamic_connection else device.load_device_configuration(job_ctx) if job.is_multinode: for group_job in job.sub_jobs_list: if group_job.dynamic_connection: # to get this far, the rest of the multinode group must also be ready # so start the dynamic connections # FIXME: rationalise and streamline controler.send_multipart([ str(worker_host.hostname), 'START', str(group_job.id), str(group_job.definition), str(device_configuration), str(open(options['env'], 'r').read()) ]) controler.send_multipart([ str(worker_host.hostname), 'START', str(job.id), str(job.definition), str(device_configuration), get_env_string(options['env']), get_env_string(options['env_dut']) ]) except (jinja2.TemplateError, IOError, yaml.YAMLError) as exc: if isinstance(exc, jinja2.TemplateNotFound): self.logger.error("Template not found: '%s'", exc.message) msg = "Infrastructure error: Template not found: '%s'" % \ exc.message elif isinstance(exc, jinja2.TemplateSyntaxError): self.logger.error( "Template syntax error in '%s', line %d: %s", exc.name, exc.lineno, exc.message) msg = "Infrastructure error: Template syntax error in '%s', line %d: %s" % \ (exc.name, exc.lineno, exc.message) elif isinstance(exc, IOError): self.logger.error("Unable to read '%s': %s", options['env'], exc.strerror) msg = "Infrastructure error: cannot open '%s': %s" % \ (options['env'], exc.strerror) elif isinstance(exc, yaml.YAMLError): self.logger.error( "Unable to parse job definition: %s", exc) msg = "Infrastructure error: cannot parse job definition: %s" % \ exc else: self.logger.exception(exc) msg = "Infrastructure error: %s" % exc.message self.logger.error("[%d] INCOMPLETE job", job.id) job.status = TestJob.INCOMPLETE if job.dynamic_connection: job.failure_comment = msg job.save() else: new_status = Device.IDLE device.state_transition_to(new_status, message=msg, job=job) device.status = new_status device.current_job = None job.failure_comment = msg job.save() device.save() if not_allocated > 0: self.logger.info("%d jobs not allocated yet", not_allocated) # Handle canceling jobs for job in TestJob.objects.filter(status=TestJob.CANCELING, is_pipeline=True): worker_host = job.lookup_worker if job.dynamic_connection else job.actual_device.worker_host if not worker_host: self.logger.warning("[%d] Invalid worker information" % job.id) # shouldn't happen fail_job(job, 'invalid worker information', TestJob.CANCELED) continue self.logger.info("[%d] CANCEL => %s", job.id, worker_host.hostname) controler.send_multipart( [str(worker_host.hostname), 'CANCEL', str(job.id)]) # Closing sockets and droping messages. self.logger.info("Closing the socket and dropping messages") controler.close(linger=0) pull_socket.close(linger=0) context.term()
def start_loop( self, sigStor ): '''Start loop.''' # Check for the previous instance if not self.host and os.path.exists( self.sockfile ): log.info( "Socket file for a server with name already exists. Checking for the server process." ) '''Socket file may remain after an unclean exit. Check if another server is running.''' try: # If the server is restarted, give the previous instance time to exit cleanly. time.sleep( 2 ) if self.servername and self.servername != "regd": s = "ps -ef | grep '{0}(/cli.py)? start .*{1} {2}' | grep -v grep".format( app.APPNAME, app.clp( defs.SERVER_NAME ), self.servername ) else: s = "ps -ef | grep -E '{0}(/cli.py)? start' | grep -v '{1}' | grep -v grep".format( app.APPNAME, app.clc( defs.SERVER_NAME ) ) res = subprocess.check_output( s, shell = True ).decode( 'utf-8' ) except subprocess.CalledProcessError as e: if e.returncode != 1: log.error( "Check for already running server instance failed: {0} ".format( e.output ) ) return -1 else: res = "" if len( res ): # TODO if res.count( "\n" ) > 2: '''Server is already running.''' log.warning( "Server is already running:\n{0}".format( res ) ) return 1 log.info( "Server process is not found. Unlinking the existing socket file." ) try: os.unlink( self.sockfile ) except OSError: if os.path.exists( self.sockfile ): raise self.useruid = os.getuid() if self.host: log.info( "Starting regd server. useruid: {0} ; host: {1} ; port: {2}.".format( self.useruid, self.host, self.port ) ) else: log.info( "Starting regd server. useruid: {0} ; sockfile: {1} ; servername: {2}.".format( self.useruid, self.sockfile, self.servername ) ) self.info["time_started"] = str( datetime.datetime.now() ).rpartition( "." )[0] # Set up sockets try: if self.host: self.sock = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) self.sock.setsockopt( socket.SOL_SOCKET, socket.SO_REUSEADDR, 1 ) self.sock.bind( ( self.host, int( self.port ) ) ) with open( self.sockfile, "w" ) as f: f.write( '' ) else: self.sock = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM ) self.sock.bind( self.sockfile ) os.chmod( self.sockfile, mode = 0o777 ) except OSError as e: log.error( "Cannot create or bind socket: %s" % ( e ) ) return -1 self.sock.listen( 1 ) self.sock.settimeout( 30 ) self.sel = selectors.DefaultSelector() self.sigsock_r, self.sigsock_w = socket.socketpair() self.sigsock_r.setblocking( False ) self.sigsock_w.setblocking( False ) os.set_inheritable( self.sigsock_w.fileno(), True ) self.sock.setblocking( False ) signal.set_wakeup_fd( self.sigsock_w.fileno() ) self.sel.register( self.sock, selectors.EVENT_READ, self.accept ) self.sel.register( self.sigsock_r, selectors.EVENT_READ, self.stop ) self.sel.register( sigStor, selectors.EVENT_READ, self.stop ) self.loop( self.sock, )
def record_command(self, command, output, env=os.environ): master_fd = None def _set_pty_size(): ''' Sets the window size of the child pty based on the window size of our own controlling terminal. ''' # Get the terminal size of the real terminal, set it on the pseudoterminal. if os.isatty(pty.STDOUT_FILENO): buf = array.array('h', [0, 0, 0, 0]) fcntl.ioctl(pty.STDOUT_FILENO, termios.TIOCGWINSZ, buf, True) fcntl.ioctl(master_fd, termios.TIOCSWINSZ, buf) else: buf = array.array('h', [24, 80, 0, 0]) fcntl.ioctl(master_fd, termios.TIOCSWINSZ, buf) def _write_stdout(data): '''Writes to stdout as if the child process had written the data.''' os.write(pty.STDOUT_FILENO, data) def _handle_master_read(data): '''Handles new data on child process stdout.''' _write_stdout(data) output.write(data) def _write_master(data): '''Writes to the child process from its controlling terminal.''' while data: n = os.write(master_fd, data) data = data[n:] def _handle_stdin_read(data): '''Handles new data on child process stdin.''' _write_master(data) def _signals(signal_list): old_handlers = [] for sig, handler in signal_list: old_handlers.append((sig, signal.signal(sig, handler))) return old_handlers def _copy(signal_fd): '''Main select loop. Passes control to _master_read() or _stdin_read() when new data arrives. ''' fds = [master_fd, pty.STDIN_FILENO, signal_fd] while True: try: rfds, wfds, xfds = select.select(fds, [], []) except OSError as e: # Python >= 3.3 if e.errno == errno.EINTR: continue except select.error as e: # Python < 3.3 if e.args[0] == 4: continue if master_fd in rfds: data = os.read(master_fd, 1024) if not data: # Reached EOF. fds.remove(master_fd) else: _handle_master_read(data) if pty.STDIN_FILENO in rfds: data = os.read(pty.STDIN_FILENO, 1024) if not data: fds.remove(pty.STDIN_FILENO) else: _handle_stdin_read(data) if signal_fd in rfds: data = os.read(signal_fd, 1024) if data: signals = struct.unpack('%uB' % len(data), data) for sig in signals: if sig in [ signal.SIGCHLD, signal.SIGHUP, signal.SIGTERM, signal.SIGQUIT ]: os.close(master_fd) return elif sig == signal.SIGWINCH: _set_pty_size() pid, master_fd = pty.fork() if pid == pty.CHILD: os.execvpe(command[0], command, env) pipe_r, pipe_w = os.pipe() flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) flags = flags | os.O_NONBLOCK flags = fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags) signal.set_wakeup_fd(pipe_w) old_handlers = _signals( map(lambda s: (s, lambda signal, frame: None), [ signal.SIGWINCH, signal.SIGCHLD, signal.SIGHUP, signal.SIGTERM, signal.SIGQUIT ])) try: mode = tty.tcgetattr(pty.STDIN_FILENO) tty.setraw(pty.STDIN_FILENO) restore = 1 except tty.error: # This is the same as termios.error restore = 0 _set_pty_size() try: _copy(pipe_r) except (IOError, OSError): pass finally: if restore: tty.tcsetattr(pty.STDIN_FILENO, tty.TCSAFLUSH, mode) _signals(old_handlers) os.waitpid(pid, 0) output.close()
async def runner() -> None: log(f'=== Starting up {self.name} ===', Ansi.LMAGENTA) loop = asyncio.get_running_loop() # Call our before_serving coroutine, # if theres one specified. if self.before_serving: await self.before_serving() # Start pending coroutine tasks. if self.debug: log(f'-> Starting {len(self._task_coros)} tasks.', Ansi.LMAGENTA) for coro in self._task_coros: task = loop.create_task(coro) task.add_done_callback(self._default_cb) # XXX: never removed? self.tasks.add(task) self._task_coros.clear() # Setup socket & begin listening if self.using_unix_socket: if os.path.exists(addr): os.remove(addr) # read/write signal listening socks sig_rsock, sig_wsock = os.pipe() os.set_blocking(sig_wsock, False) signal.set_wakeup_fd(sig_wsock) # connection listening sock lsock = socket.socket(self.sock_family) lsock.setblocking(False) lsock.bind(addr) if self.using_unix_socket: os.chmod(addr, 0o777) lsock.listen(self.max_conns) log(f'-> Listening @ {addr}', AnsiRGB(0x00ff7f)) # TODO: terminal input support (tty, termios fuckery) # though, tbh this should be moved into gulag as it's # mostly a gulag-specific thing, and it'll be easier # to manage all the printing stuff that way. should_close = False should_restart = False while True: await asyncio.sleep(0.01) # skip loop iteration rlist, _, _ = select.select([lsock, sig_rsock], [], [], 0) for reader in rlist: if reader is lsock: # new connection received for server client, _ = await loop.sock_accept(lsock) task = loop.create_task(self.handle(client)) task.add_done_callback(self._default_cb) elif reader is sig_rsock: # received a blocked signal, shutdown sig_received = signal.Signals(os.read(sig_rsock, 1)[0]) if sig_received is signal.SIGINT: print('\x1b[2K', end='\r') # clear ^C from console elif sig_received is signal.SIGUSR1: should_restart = True log(f'Received {signal.strsignal(sig_received)}', Ansi.LRED) should_close = True else: raise RuntimeError(f'Unknown reader {reader}') if should_close: break # server closed, clean things up. for sock_fd in {lsock.fileno(), sig_rsock, sig_wsock}: os.close(sock_fd) signal.set_wakeup_fd(-1) if self.using_unix_socket: os.remove(addr) log('-> Cancelling tasks', Ansi.LMAGENTA) for task in self.tasks: task.cancel() await asyncio.gather(*self.tasks, return_exceptions=True) if in_progress := [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]: try: # allow up to 5 seconds for in-progress handlers # to finish their execution, just incase they're # in a half-complete state. we wouldn't want to # get any sql tables into a weird state, or alike. log(f'-> Awaiting {len(in_progress)} ' 'in-progress handler(s).', Ansi.LMAGENTA) await asyncio.wait(in_progress, loop=loop, timeout=5.0) except asyncio.TimeoutError: log('-> Timed out awaiting handlers, cancelling them.', Ansi.LMAGENTA) to_await = [] for task in in_progress: if not task.cancelled(): task.cancel() to_await.append(task) await asyncio.gather(*to_await, return_exceptions=True)
import functools import os import traceback import pyuv import signal import sys from .util import set_nonblocking, close_fd, SharedPoll if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': rfd, wfd = os.pipe() set_nonblocking(rfd) set_nonblocking(wfd) try: old_wakeup_fd = signal.set_wakeup_fd(wfd) if old_wakeup_fd != -1: signal.set_wakeup_fd(old_wakeup_fd) close_fd(rfd) close_fd(wfd) else: _signal_check_rfd, _signal_check_wfd = rfd, wfd atexit.register(close_fd, rfd) atexit.register(close_fd, wfd) except ValueError: _signal_check_rfd, _signal_check_wfd = None, None close_fd(rfd) close_fd(wfd) else: _signal_check_rfd, _signal_check_wfd = None, None
def close(self): self.wakeup_sock.close() self.write_sock.close() if self.old_wakeup_fd is not None: signal.set_wakeup_fd(self.old_wakeup_fd)
""" import errno import fcntl import os import select import signal # create a non blocking pipe pipe_r, pipe_w = os.pipe() flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) flags = flags | os.O_NONBLOCK flags = fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags) # set the write end of the pipe as the target of signals signal.set_wakeup_fd(pipe_w) # write a signal handler for our signal, otherwise we will # exit every time the signal is received... signal.signal(signal.SIGUSR1, lambda x, y: None) ''' We need this functions since python, unlike glibc, does not restart system calls. This means that when a signal arrives the poll system call will be broken, signal handler called by the system call not restarted. This means that the signal handler will work, will write the single byte to the pipe, but we will not be woken up by poll and instead, as is in python, an IOError will fly out with errno=EINTR. To overcome this we need a restartable poll '''
def start(self): if not logging.getLogger().handlers: # The IOLoop catches and logs exceptions, so it's # important that log output be visible. However, python's # default behavior for non-root loggers (prior to python # 3.2) is to print an unhelpful "no handlers could be # found" message rather than the actual log entry, so we # must explicitly configure logging if we've made it this # far without anything. logging.basicConfig() if self._stopped: self._stopped = False return old_current = getattr(IOLoop._current, "instance", None) IOLoop._current.instance = self self._thread_ident = thread.get_ident() self._running = True # signal.set_wakeup_fd closes a race condition in event loops: # a signal may arrive at the beginning of select/poll/etc # before it goes into its interruptible sleep, so the signal # will be consumed without waking the select. The solution is # for the (C, synchronous) signal handler to write to a pipe, # which will then be seen by select. # # In python's signal handling semantics, this only matters on the # main thread (fortunately, set_wakeup_fd only works on the main # thread and will raise a ValueError otherwise). # # If someone has already set a wakeup fd, we don't want to # disturb it. This is an issue for twisted, which does its # SIGCHILD processing in response to its own wakeup fd being # written to. As long as the wakeup fd is registered on the IOLoop, # the loop will still wake up and everything should work. old_wakeup_fd = None if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': # requires python 2.6+, unix. set_wakeup_fd exists but crashes # the python process on windows. try: old_wakeup_fd = signal.set_wakeup_fd( self._waker.write_fileno()) if old_wakeup_fd != -1: # Already set, restore previous value. This is a little racy, # but there's no clean get_wakeup_fd and in real use the # IOLoop is just started once at the beginning. signal.set_wakeup_fd(old_wakeup_fd) old_wakeup_fd = None except ValueError: # non-main thread pass while True: poll_timeout = 3600.0 # Prevent IO event starvation by delaying new callbacks # to the next iteration of the event loop. with self._callback_lock: callbacks = self._callbacks self._callbacks = [] for callback in callbacks: self._run_callback(callback) if self._timeouts: now = self.time() while self._timeouts: if self._timeouts[0].callback is None: # the timeout was cancelled heapq.heappop(self._timeouts) self._cancellations -= 1 elif self._timeouts[0].deadline <= now: timeout = heapq.heappop(self._timeouts) self._run_callback(timeout.callback) else: seconds = self._timeouts[0].deadline - now poll_timeout = min(seconds, poll_timeout) break if (self._cancellations > 512 and self._cancellations > (len(self._timeouts) >> 1)): # Clean up the timeout queue when it gets large and it's # more than half cancellations. self._cancellations = 0 self._timeouts = [ x for x in self._timeouts if x.callback is not None ] heapq.heapify(self._timeouts) if self._callbacks: # If any callbacks or timeouts called add_callback, # we don't want to wait in poll() before we run them. poll_timeout = 0.0 if not self._running: break if self._blocking_signal_threshold is not None: # clear alarm so it doesn't fire while poll is waiting for # events. signal.setitimer(signal.ITIMER_REAL, 0, 0) try: event_pairs = self._impl.poll(poll_timeout) except Exception as e: # Depending on python version and IOLoop implementation, # different exception types may be thrown and there are # two ways EINTR might be signaled: # * e.errno == errno.EINTR # * e.args is like (errno.EINTR, 'Interrupted system call') if (getattr(e, 'errno', None) == errno.EINTR or (isinstance(getattr(e, 'args', None), tuple) and len(e.args) == 2 and e.args[0] == errno.EINTR)): continue else: raise if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, self._blocking_signal_threshold, 0) # Pop one fd at a time from the set of pending fds and run # its handler. Since that handler may perform actions on # other file descriptors, there may be reentrant calls to # this IOLoop that update self._events self._events.update(event_pairs) while self._events: fd, events = self._events.popitem() try: self._handlers[fd](fd, events) except (OSError, IOError) as e: if e.args[0] == errno.EPIPE: # Happens when the client closes the connection pass else: app_log.error("Exception in I/O handler for fd %s", fd, exc_info=True) except Exception: app_log.error("Exception in I/O handler for fd %s", fd, exc_info=True) # reset the stopped flag so another start/stop pair can be issued self._stopped = False if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, 0, 0) IOLoop._current.instance = old_current if old_wakeup_fd is not None: signal.set_wakeup_fd(old_wakeup_fd)
def run(self): signal.set_wakeup_fd(-1) set_event_loop_policy(None) super().run()
def test_warn_set_wakeup_fd_overwrite(): assert signal.set_wakeup_fd(-1) == -1 async def trio_main(in_host): return "ok" a, b = socket.socketpair() with a, b: a.setblocking(False) # Warn if there's already a wakeup fd signal.set_wakeup_fd(a.fileno()) try: with pytest.warns(RuntimeWarning, match="signal handling code.*collided"): assert trivial_guest_run(trio_main) == "ok" finally: assert signal.set_wakeup_fd(-1) == a.fileno() signal.set_wakeup_fd(a.fileno()) try: with pytest.warns(RuntimeWarning, match="signal handling code.*collided"): assert (trivial_guest_run( trio_main, host_uses_signal_set_wakeup_fd=False) == "ok") finally: assert signal.set_wakeup_fd(-1) == a.fileno() # Don't warn if there isn't already a wakeup fd with pytest.warns(None) as record: assert trivial_guest_run(trio_main) == "ok" # Apparently this is how you assert 'there were no RuntimeWarnings' with pytest.raises(AssertionError): record.pop(RuntimeWarning) with pytest.warns(None) as record: assert (trivial_guest_run( trio_main, host_uses_signal_set_wakeup_fd=True) == "ok") with pytest.raises(AssertionError): record.pop(RuntimeWarning) # If there's already a wakeup fd, but we've been told to trust it, # then it's left alone and there's no warning signal.set_wakeup_fd(a.fileno()) try: async def trio_check_wakeup_fd_unaltered(in_host): fd = signal.set_wakeup_fd(-1) assert fd == a.fileno() signal.set_wakeup_fd(fd) return "ok" with pytest.warns(None) as record: assert (trivial_guest_run( trio_check_wakeup_fd_unaltered, host_uses_signal_set_wakeup_fd=True, ) == "ok") with pytest.raises(AssertionError): record.pop(RuntimeWarning) finally: assert signal.set_wakeup_fd(-1) == a.fileno()
async def trio_check_wakeup_fd_unaltered(in_host): fd = signal.set_wakeup_fd(-1) assert fd == a.fileno() signal.set_wakeup_fd(fd) return "ok"
def _execute(self, queue, tasks, log, locks, queue_lock, all_task_ids): """ Executes the given tasks. Returns a boolean indicating whether the tasks were executed successfully. """ # The tasks must use the same function. assert len(tasks) task_func = tasks[0].serialized_func assert all([task_func == task.serialized_func for task in tasks[1:]]) # Before executing periodic tasks, queue them for the next period. if task_func in self.tiger.periodic_task_funcs: tasks[0]._queue_for_next_period() with g_fork_lock: child_pid = os.fork() if child_pid == 0: # Child process log = log.bind(child_pid=os.getpid()) # Disconnect the Redis connection inherited from the main process. # Note that this doesn't disconnect the socket in the main process. self.connection.connection_pool.disconnect() random.seed() # Ignore Ctrl+C in the child so we don't abort the job -- the main # process already takes care of a graceful shutdown. signal.signal(signal.SIGINT, signal.SIG_IGN) # Run the tasks. success = self._execute_forked(tasks, log) # Wait for any threads that might be running in the child, just # like sys.exit() would. Note we don't call sys.exit() directly # because it would perform additional cleanup (e.g. calling atexit # handlers twice). See also: https://bugs.python.org/issue18966 threading._shutdown() os._exit(int(not success)) else: # Main process log = log.bind(child_pid=child_pid) for task in tasks: log.info( 'processing', func=task_func, task_id=task.id, params={ 'args': task.args, 'kwargs': task.kwargs }, ) # Attach a signal handler to SIGCHLD (sent when the child process # exits) so we can capture it. signal.signal(signal.SIGCHLD, sigchld_handler) # Since newer Python versions retry interrupted system calls we can't # rely on the fact that select() is interrupted with EINTR. Instead, # we'll set up a wake-up file descriptor below. # Create a new pipe and apply the non-blocking flag (required for # set_wakeup_fd). pipe_r, pipe_w = os.pipe() opened_fd = os.fdopen(pipe_r) flags = fcntl.fcntl(pipe_r, fcntl.F_GETFL, 0) flags = flags | os.O_NONBLOCK fcntl.fcntl(pipe_r, fcntl.F_SETFL, flags) flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) flags = flags | os.O_NONBLOCK fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags) # A byte will be written to pipe_w if a signal occurs (and can be # read from pipe_r). old_wakeup_fd = signal.set_wakeup_fd(pipe_w) def check_child_exit(): """ Do a non-blocking check to see if the child process exited. Returns None if the process is still running, or the exit code value of the child process. """ try: pid, return_code = os.waitpid(child_pid, os.WNOHANG) if pid != 0: # The child process is done. return return_code except OSError as e: # Of course EINTR can happen if the child process exits # while we're checking whether it exited. In this case it # should be safe to retry. if e.errno == errno.EINTR: return check_child_exit() else: raise # Wait for the child to exit and perform a periodic heartbeat. # We check for the child twice in this loop so that we avoid # unnecessary waiting if the child exited just before entering # the while loop or while renewing heartbeat/locks. while True: return_code = check_child_exit() if return_code is not None: break # Wait until the timeout or a signal / child exit occurs. try: # If observed the following behavior will be seen # in the pipe when the parent process receives a # SIGTERM while a task is running in a child process: # Linux: # - 0 when parent receives SIGTERM # - select() exits with EINTR when child exit # triggers signal, so the signal in the # pipe is never seen since check_child_exit() # will see the child is gone # # macOS: # - 15 (SIGTERM) when parent receives SIGTERM # - 20 (SIGCHLD) when child exits results = select.select( [pipe_r], [], [], self.config['ACTIVE_TASK_UPDATE_TIMER'], ) if results[0]: # Purge pipe so select will pause on next call try: # Behavior of a would be blocking read() # Linux: # Python 2.7 Raises IOError # Python 3.x returns empty string # # macOS: # Returns empty string opened_fd.read(1) except IOError: pass except select.error as e: if e.args[0] != errno.EINTR: raise return_code = check_child_exit() if return_code is not None: break try: self._heartbeat(queue, all_task_ids) for lock in locks: try: lock.reacquire() except LockError: log.warning('could not reacquire lock', lock=lock.name) if queue_lock: acquired, current_locks = queue_lock.renew() if not acquired: log.debug('queue lock renew failure') except OSError as e: # EINTR happens if the task completed. Since we're just # renewing locks/heartbeat it's okay if we get interrupted. if e.errno != errno.EINTR: raise # Restore signals / clean up signal.signal(signal.SIGCHLD, signal.SIG_DFL) signal.set_wakeup_fd(old_wakeup_fd) opened_fd.close() os.close(pipe_w) success = return_code == 0 return success
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None): '''Run forkserver.''' if preload: if '__main__' in preload and main_path is not None: process.current_process()._inheriting = True try: spawn.import_main_path(main_path) finally: del process.current_process()._inheriting for modname in preload: try: __import__(modname) except ImportError: pass util._close_stdin() sig_r, sig_w = os.pipe() os.set_blocking(sig_r, False) os.set_blocking(sig_w, False) def sigchld_handler(*_unused): # Dummy signal handler, doesn't do anything pass handlers = { # unblocking SIGCHLD allows the wakeup fd to notify our event loop signal.SIGCHLD: sigchld_handler, # protect the process from ^C signal.SIGINT: signal.SIG_IGN, } old_handlers = { sig: signal.signal(sig, val) for (sig, val) in handlers.items() } # calling os.write() in the Python signal handler is racy signal.set_wakeup_fd(sig_w) # map child pids to client fds pid_to_fd = {} with socket.socket(socket.AF_UNIX, fileno=listener_fd) as listener, \ selectors.DefaultSelector() as selector: _forkserver._forkserver_address = listener.getsockname() selector.register(listener, selectors.EVENT_READ) selector.register(alive_r, selectors.EVENT_READ) selector.register(sig_r, selectors.EVENT_READ) while True: try: while True: rfds = [key.fileobj for (key, events) in selector.select()] if rfds: break if alive_r in rfds: # EOF because no more client processes left assert os.read(alive_r, 1) == b'', "Not at EOF?" raise SystemExit if sig_r in rfds: # Got SIGCHLD os.read(sig_r, 65536) # exhaust while True: # Scan for child processes try: pid, sts = os.waitpid(-1, os.WNOHANG) except ChildProcessError: break if pid == 0: break child_w = pid_to_fd.pop(pid, None) if child_w is not None: if os.WIFSIGNALED(sts): returncode = -os.WTERMSIG(sts) else: if not os.WIFEXITED(sts): raise AssertionError( "Child {0:n} status is {1:n}".format( pid, sts)) returncode = os.WEXITSTATUS(sts) # Send exit code to client process try: write_signed(child_w, returncode) except BrokenPipeError: # client vanished pass os.close(child_w) else: # This shouldn't happen really warnings.warn('forkserver: waitpid returned ' 'unexpected pid %d' % pid) if listener in rfds: # Incoming fork request with listener.accept()[0] as s: # Receive fds from client fds = reduction.recvfds(s, MAXFDS_TO_SEND + 1) if len(fds) > MAXFDS_TO_SEND: raise RuntimeError( "Too many ({0:n}) fds to send".format( len(fds))) child_r, child_w, *fds = fds s.close() pid = os.fork() if pid == 0: # Child code = 1 try: listener.close() selector.close() unused_fds = [alive_r, child_w, sig_r, sig_w] unused_fds.extend(pid_to_fd.values()) code = _serve_one(child_r, fds, unused_fds, old_handlers) except Exception: sys.excepthook(*sys.exc_info()) sys.stderr.flush() finally: os._exit(code) else: # Send pid to client process try: write_signed(child_w, pid) except BrokenPipeError: # client vanished pass pid_to_fd[pid] = child_w os.close(child_r) for fd in fds: os.close(fd) except OSError as e: if e.errno != errno.ECONNABORTED: raise
import pyuv import signal import socket reader, writer = socket.socketpair() reader.setblocking(False) writer.setblocking(False) def prepare_cb(handle): print("Inside prepare_cb") def excepthook(typ, val, tb): print("Inside excepthook") if typ is KeyboardInterrupt: prepare.stop() signal_checker.stop() loop = pyuv.Loop.default_loop() loop.excepthook = excepthook prepare = pyuv.Prepare(loop) prepare.start(prepare_cb) signal.set_wakeup_fd(writer.fileno()) signal_checker = pyuv.util.SignalChecker(loop, reader.fileno()) signal_checker.start() loop.run()
def start(self): if self._running: raise RuntimeError("IOLoop is already running") if os.getpid() != self._pid: raise RuntimeError("Cannot share PollIOLoops across processes") self._setup_logging() if self._stopped: self._stopped = False return old_current = getattr(IOLoop._current, "instance", None) IOLoop._current.instance = self self._thread_ident = thread.get_ident() self._running = True # signal.set_wakeup_fd closes a race condition in event loops: # a signal may arrive at the beginning of select/poll/etc # before it goes into its interruptible sleep, so the signal # will be consumed without waking the select. The solution is # for the (C, synchronous) signal handler to write to a pipe, # which will then be seen by select. # # In python's signal handling semantics, this only matters on the # main thread (fortunately, set_wakeup_fd only works on the main # thread and will raise a ValueError otherwise). # # If someone has already set a wakeup fd, we don't want to # disturb it. This is an issue for twisted, which does its # SIGCHLD processing in response to its own wakeup fd being # written to. As long as the wakeup fd is registered on the IOLoop, # the loop will still wake up and everything should work. old_wakeup_fd = None if hasattr(signal, 'set_wakeup_fd') and os.name == 'posix': # requires python 2.6+, unix. set_wakeup_fd exists but crashes # the python process on windows. try: old_wakeup_fd = signal.set_wakeup_fd(self._waker.write_fileno()) if old_wakeup_fd != -1: # Already set, restore previous value. This is a little racy, # but there's no clean get_wakeup_fd and in real use the # IOLoop is just started once at the beginning. signal.set_wakeup_fd(old_wakeup_fd) old_wakeup_fd = None except ValueError: # Non-main thread, or the previous value of wakeup_fd # is no longer valid. old_wakeup_fd = None try: while True: # Prevent IO event starvation by delaying new callbacks # to the next iteration of the event loop. ncallbacks = len(self._callbacks) # Add any timeouts that have come due to the callback list. # Do not run anything until we have determined which ones # are ready, so timeouts that call add_timeout cannot # schedule anything in this iteration. due_timeouts = [] if self._timeouts: now = self.time() while self._timeouts: if self._timeouts[0].callback is None: # The timeout was cancelled. Note that the # cancellation check is repeated below for timeouts # that are cancelled by another timeout or callback. heapq.heappop(self._timeouts) self._cancellations -= 1 elif self._timeouts[0].deadline <= now: due_timeouts.append(heapq.heappop(self._timeouts)) else: break if (self._cancellations > 512 and self._cancellations > (len(self._timeouts) >> 1)): # Clean up the timeout queue when it gets large and it's # more than half cancellations. self._cancellations = 0 self._timeouts = [x for x in self._timeouts if x.callback is not None] heapq.heapify(self._timeouts) for i in range(ncallbacks): self._run_callback(self._callbacks.popleft()) for timeout in due_timeouts: if timeout.callback is not None: self._run_callback(timeout.callback) # Closures may be holding on to a lot of memory, so allow # them to be freed before we go into our poll wait. due_timeouts = timeout = None if self._callbacks: # If any callbacks or timeouts called add_callback, # we don't want to wait in poll() before we run them. poll_timeout = 0.0 elif self._timeouts: # If there are any timeouts, schedule the first one. # Use self.time() instead of 'now' to account for time # spent running callbacks. poll_timeout = self._timeouts[0].deadline - self.time() poll_timeout = max(0, min(poll_timeout, _POLL_TIMEOUT)) else: # No timeouts and no callbacks, so use the default. poll_timeout = _POLL_TIMEOUT if not self._running: break if self._blocking_signal_threshold is not None: # clear alarm so it doesn't fire while poll is waiting for # events. signal.setitimer(signal.ITIMER_REAL, 0, 0) try: event_pairs = self._impl.poll(poll_timeout) except Exception as e: # Depending on python version and IOLoop implementation, # different exception types may be thrown and there are # two ways EINTR might be signaled: # * e.errno == errno.EINTR # * e.args is like (errno.EINTR, 'Interrupted system call') if errno_from_exception(e) == errno.EINTR: continue else: raise if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, self._blocking_signal_threshold, 0) # Pop one fd at a time from the set of pending fds and run # its handler. Since that handler may perform actions on # other file descriptors, there may be reentrant calls to # this IOLoop that modify self._events self._events.update(event_pairs) while self._events: fd, events = self._events.popitem() try: fd_obj, handler_func = self._handlers[fd] handler_func(fd_obj, events) except (OSError, IOError) as e: if errno_from_exception(e) == errno.EPIPE: # Happens when the client closes the connection pass else: self.handle_callback_exception(self._handlers.get(fd)) except Exception: self.handle_callback_exception(self._handlers.get(fd)) fd_obj = handler_func = None finally: # reset the stopped flag so another start/stop pair can be issued self._stopped = False if self._blocking_signal_threshold is not None: signal.setitimer(signal.ITIMER_REAL, 0, 0) IOLoop._current.instance = old_current if old_wakeup_fd is not None: signal.set_wakeup_fd(old_wakeup_fd)
def tearDown(self): signal.set_wakeup_fd(self.old_wakeup) os.close(self.read) os.close(self.write) signal.signal(signal.SIGALRM, self.alrm)
def main(): """Main program.""" global dbg, cleanup_objects sensors = [] itimer_next = {} # Initialize debugging dbg = Debug(level=DEBUG_LVL) cleanup_objects['debug'] = dbg # Initialize sensors for sensor in W1ThermSensor.get_available_sensors(): sensors.append({ 'obj': sensor, 'id_short': sensor.id[-4:], 'value': None, 'read_success': 0, 'read_crc': 0, 'read_nan': 0, 'e_rate': 0 }) if len(sensors) == 0: sys.stderr.write('\nERROR: No sensors found\n') cleanup() exit(0) else: sys.stderr.write('\nINFO: Found {} sensors\n'.format(len(sensors))) active_sensor_idx = 0 # Initialize LCD lcd = disp_init() cleanup_objects['lcd'] = lcd # Very first run for sensor in sensors: read_sensor(sensor) disp_clock(lcd) disp_sensor(lcd, sensors[active_sensor_idx]) # Initialize signal file descriptor # We must set write end of pipe to non blocking mode # Also we don't want to block while read signal numbers from read end pipe_r, pipe_w = os.pipe() cleanup_objects['pipe_r'] = pipe_r cleanup_objects['pipe_w'] = pipe_w flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0) fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags | os.O_NONBLOCK) signal.set_wakeup_fd(pipe_w) flags = fcntl.fcntl(pipe_r, fcntl.F_GETFL, 0) fcntl.fcntl(pipe_r, fcntl.F_SETFL, flags | os.O_NONBLOCK) # Redefine signal handlers cleanup_objects['sigalrm'] = signal.signal(signal.SIGALRM, signal_handler) cleanup_objects['sigint'] = signal.signal(signal.SIGINT, signal_handler) cleanup_objects['sighup'] = signal.signal(signal.SIGHUP, signal_handler) cleanup_objects['sigterm'] = signal.signal(signal.SIGTERM, signal_handler) # Create poller and register file descriptors poller = select.epoll() cleanup_objects['poller'] = poller poller.register(pipe_r, select.EPOLLIN) # Calculate interval timer value itimer_value = gcd(gcd(SENSOR_READ_INTERVAL, SENSOR_DISP_INTERVAL), CLOCK_DISP_INTERVAL) dbg.dbg( 'Calculated itimer interval value is {} seconds'.format(itimer_value)) # Set interval timer # Initial value of timer bounded to measurement itimer_value t = time() t_rest = itimer_value - t % itimer_value # if t_rest < 0: # t_rest += itimer_value t_start = t_rest + ITIMER_START_SHIFT signal.setitimer(signal.ITIMER_REAL, t_start, itimer_value) cleanup_objects['itimer'] = True dbg.dbg('ITIMER_REAL will fire at {} and each {} seconds'.format( itimer_conv(t + t_start), itimer_value)) # Set fire times t_base = t + t_rest dbg.dbg(' Base time is {}'.format(itimer_conv(t_base))) f = lambda x, y: x - x % y + y * ceil((x % y) / y) itimer_next['sensor_read_interval'] = f(t_base, SENSOR_READ_INTERVAL) dbg.dbg(' Wake up time for SENSOR_READ set to {}'.format( itimer_conv(itimer_next['sensor_read_interval']))) itimer_next['sensor_disp_interval'] = f(t_base, SENSOR_DISP_INTERVAL) dbg.dbg(' Wake up time for SENSOR_DISP set to {}'.format( itimer_conv(itimer_next['sensor_disp_interval']))) itimer_next['clock_disp_interval'] = f(t_base, CLOCK_DISP_INTERVAL) dbg.dbg(' Wake up time for CLOCK_DISP set to {}'.format( itimer_conv(itimer_next['clock_disp_interval']))) # Main loop sys.stderr.write('INFO: Entering main loop\n') while True: # Wait for events and process its try: events = poller.poll() except InterruptedError: continue for fd, flags in events: dbg.dbg('Start processing event, fd={}, flags={}'.format( fd, flags)) # Signal received, extract signal numbers from wakeup fd if fd == pipe_r and flags & select.EPOLLIN: dbg.dbg( 'Signal received from wakeup fd, unpacking signal numbers') data = os.read(pipe_r, SIG_WAKEUP_FD_RLEN) signums = struct.unpack('{}B'.format(len(data)), data) dbg.dbg('Signal numbers unpacked: {}'.format(signums)) # Make signal list have unique numbers only signums = set(signums) # Process signals for signum in signums: if signum == signal.SIGALRM: t = time() dbg.dbg('Got SIGALRM, dispatch itimer based tasks') # Display clock if itimer_next['clock_disp_interval'] <= time(): dbg.dbg('Start CLOCK_DISP task') disp_clock(lcd) while itimer_next['clock_disp_interval'] <= time(): itimer_next[ 'clock_disp_interval'] += CLOCK_DISP_INTERVAL dbg.dbg( ' Wake up time for CLOCK_DISP set to {}'. format( itimer_conv( itimer_next['clock_disp_interval']))) # Read sensors if itimer_next['sensor_read_interval'] <= t: dbg.dbg('Start SENSOR_READ task') for sensor in sensors: read_sensor(sensor) while itimer_next['sensor_read_interval'] <= time( ): itimer_next[ 'sensor_read_interval'] += SENSOR_READ_INTERVAL dbg.dbg( ' Wake up time for SENSOR_READ set to {}'. format( itimer_conv( itimer_next['sensor_read_interval']))) # Display sensor if itimer_next['sensor_disp_interval'] <= t: active_sensor = sensors[active_sensor_idx] dbg.dbg( 'Start SENSOR_DISP task, sensor number {} id {}' .format(active_sensor_idx, active_sensor['id_short'])) disp_sensor(lcd, active_sensor) active_sensor_idx += 1 if active_sensor_idx >= len(sensors): active_sensor_idx = 0 while itimer_next['sensor_disp_interval'] <= time( ): itimer_next[ 'sensor_disp_interval'] += SENSOR_DISP_INTERVAL dbg.dbg( ' Wake up time for SENSOR_DISP set to {}'. format( itimer_conv( itimer_next['sensor_disp_interval']))) elif signum == signal.SIGINT: dbg.dbg('Got SIGINT, terminating') sys.stderr.write('\nINFO: SIGINT received\n') cleanup() sys.exit(0) elif signum == signal.SIGTERM: dbg.dbg('Got SIGTERM, terminating') sys.stderr.write('\nINFO: SIGTERM received\n') cleanup() sys.exit(0) elif signum == signal.SIGHUP: dbg.dbg('Got SIGHUP, ignoring') sys.stderr.write('INFO: SIGHUP received\n') else: dbg.dbg( 'Got uncaught signal {}, ignoring'.format(signum)) sys.stderr.write( 'WARNING: Unexpected signal received: {}\n'.format( signum)) # Unexpected event else: dbg.dbg('Unexpected event on fd {}, flags {}'.format( fd, flags)) sys.stderr.write( 'ERROR: Unexpected event on fd {}, flags {}\n'.format( fd, flags))