def kill_process(self, process): """Kill process (stop_signal, graceful_timeout then SIGKILL) """ if process.stopping: raise gen.Return(False) try: logger.debug("%s: kill process %s", self.name, process.pid) if self.stop_children: self.send_signal_process(process, self.stop_signal) else: self.send_signal(process.pid, self.stop_signal) self.notify_event("kill", {"process_pid": process.pid, "time": time.time()}) except NoSuchProcess: raise gen.Return(False) process.stopping = True waited = 0 while waited < self.graceful_timeout: if not process.is_alive(): break yield tornado_sleep(0.1) waited += 0.1 if waited >= self.graceful_timeout: # On Windows we can't send a SIGKILL signal, but the # process.stop function will terminate the process # later anyway if hasattr(signal, 'SIGKILL'): # We are not smart anymore self.send_signal_process(process, signal.SIGKILL) if self.stream_redirector: self.stream_redirector.remove_redirections(process) process.stopping = False process.stop() raise gen.Return(True)
def bind_and_listen(self): try: if self.is_unix: if os.path.exists(self.path): raise OSError("%r already exists. You might want to " "remove it. If it's a stalled socket " "file, just restart Circus" % self.path) if self.umask is None: self.bind(self.path) else: old_mask = os.umask(self.umask) self.bind(self.path) os.umask(old_mask) else: self.bind((self.host, self.port)) except socket.error: logger.error('Could not bind %s' % self.location) raise self.setblocking(0) if self.socktype in (socket.SOCK_STREAM, socket.SOCK_SEQPACKET): self.listen(self.backlog) if not self.is_unix: self.host, self.port = self.getsockname() logger.debug('Socket bound at %s - fd: %d' % (self.location, self.fileno()))
def start(self): """Starts all the watchers. If the ioloop has been provided during __init__() call, starts all watchers as a standard coroutine If the ioloop hasn't been provided during __init__() call (default), starts all watchers and the eventloop (and blocks here). In this mode the method MUST NOT yield anything because it's called as a standard method. """ logger.info("Starting master on pid %s", self.pid) self.initialize() # start controller self.ctrl.start() self._restarting = False try: # initialize processes logger.debug('Initializing watchers') if self._provided_loop: yield self.start_watchers() else: # start_watchers will be called just after the start_io_loop() self.loop.add_future(self.start_watchers(), lambda x: None) logger.info('Arbiter now waiting for commands') self._running = True if not self._provided_loop: # If an event loop is not provided, block at this line self.start_io_loop() finally: if not self._provided_loop: # If an event loop is not provided, do some cleaning self.stop_controller_and_close_sockets() raise gen.Return(self._restarting)
def reap_processes(self): """Reap all the processes for this watcher. """ if self.stopped: logger.debug('do not reap processes as the watcher is stopped') return while True: try: # wait for completion of all the childs of circus, if it # pertains to this watcher. Call reap on it. pid, status = os.waitpid(-1, os.WNOHANG) if not pid: return if pid in self.processes: self.reap_process(pid, status) if self.stopped: logger.debug('watcher have been stopped, exit the loop') return except OSError as e: if e.errno == errno.EAGAIN: time.sleep(0.001) continue elif e.errno == errno.ECHILD: return else: raise
def bind_and_listen(self): try: if self.is_unix: if os.path.exists(self.path): raise OSError("%r already exists. You might want to " "remove it. If it's a stalled socket " "file, just restart Circus" % self.path) if self.umask is None: self.bind(self.path) else: old_mask = os.umask(self.umask) self.bind(self.path) os.umask(old_mask) else: if self.interface is not None: # Bind to device if given, e.g. to limit which device to bind # when binding on IN_ADDR_ANY or IN_ADDR_BROADCAST. import IN self.setsockopt(socket.SOL_SOCKET, IN.SO_BINDTODEVICE, self.interface + '\0') logger.debug('Binding to device: %s' % self.interface) self.bind((self.host, self.port)) except socket.error: logger.error('Could not bind %s' % self.location) raise self.setblocking(0) if self.socktype in (socket.SOCK_STREAM, socket.SOCK_SEQPACKET): self.listen(self.backlog) if not self.is_unix: self.host, self.port = self.getsockname() logger.debug('Socket bound at %s - fd: %d' % (self.location, self.fileno()))
def manage_processes(self): """Manage processes.""" if self.stopped: return if self.max_age: for process in self.processes.itervalues(): max_age = self.max_age + randint(0, self.max_age_variance) if process.age() > max_age: logger.debug('%s: expired, respawning', self.name) self.notify_event("expired", {"process_pid": process.pid, "time": time.time()}) self.kill_process(process) if self.respawn and len(self.processes) < self.numprocesses: self.spawn_processes() processes = self.processes.values() processes.sort() while len(processes) > self.numprocesses: process = processes.pop(0) if process.status == DEAD_OR_ZOMBIE: self.processes.pop(process.pid) else: self.processes.pop(process.pid) self.kill_process(process)
def start(self): """Start. """ if not self.stopped: return if self.on_demand and not self.arbiter.socket_event: return self.stopped = False if not self.call_hook('before_start'): logger.debug('Aborting startup') self.stopped = True return False self._create_redirectors() self.reap_processes() self.spawn_processes() if not self.call_hook('after_start'): logger.debug('Aborting startup') self.stop() return False if self.stdout_redirector is not None: self.stdout_redirector.start() if self.stderr_redirector is not None: self.stderr_redirector.start() logger.info('%s started' % self.name) self.notify_event("start", {"time": time.time()}) return True
def _stop(self, close_output_streams=False, for_shutdown=False): if self.is_stopped(): return self._status = "stopping" skip = for_shutdown and self.use_papa if not skip: logger.debug('stopping the %s watcher' % self.name) logger.debug('gracefully stopping processes [%s] for %ss' % ( self.name, self.graceful_timeout)) # We ignore the hook result self.call_hook('before_stop') yield self.kill_processes() self.reap_processes() # stop redirectors if self.stream_redirector: self.stream_redirector.stop() self.stream_redirector = None if close_output_streams: if self.stdout_stream and hasattr(self.stdout_stream, 'close'): self.stdout_stream.close() if self.stderr_stream and hasattr(self.stderr_stream, 'close'): self.stderr_stream.close() # notify about the stop if skip: logger.info('%s left running in papa', self.name) else: if self.evpub_socket is not None: self.notify_event("stop", {"time": time.time()}) self._status = "stopped" # We ignore the hook result self.call_hook('after_stop') logger.info('%s stopped', self.name)
def start(self): """Start. """ if not self.stopped: return if self.on_demand and not self.arbiter.socket_event: return self.stopped = False if not self.call_hook("before_start"): logger.debug("Aborting startup") self.stopped = True return False self._create_redirectors() self.reap_processes() self.spawn_processes() if not self.call_hook("after_start"): logger.debug("Aborting startup") self.stop() return False if self.stdout_redirector is not None: self.stdout_redirector.start() if self.stderr_redirector is not None: self.stderr_redirector.start() logger.info("%s started" % self.name) self.notify_event("start", {"time": time.time()}) return True
def handle_recv(self, data): """Handle received message from circusd We need to handle two messages: - spawn: add a new monitored child pid - reap: remove a killed child pid from monitoring """ watcher_name, action, msg = self.split_data(data) logger.debug("received data from circusd: watcher.%s.%s, %s", watcher_name, action, msg) # check if monitored watchers: if self._match_watcher_name(watcher_name): try: message = self.load_message(msg) except ValueError: logger.error("Error while decoding json for message: %s", msg) else: if "process_pid" not in message: logger.warning('no process_pid in message') return pid = str(message.get("process_pid")) if action == "spawn": self.pid_status[pid] = dict(watcher=watcher_name, last_activity=time.time()) logger.info("added new monitored pid for %s:%s", watcher_name, pid) # very questionable fix for Py3 here! # had to add check for pid in self.pid_status elif action == "reap" and pid in self.pid_status: old_pid = self.pid_status.pop(pid) logger.info("removed monitored pid for %s:%s", old_pid['watcher'], pid)
def _stop(self, close_output_streams=False, for_shutdown=False): if self.is_stopped(): return self._status = "stopping" skip = for_shutdown and self.use_papa if not skip: logger.debug('stopping the %s watcher' % self.name) logger.debug('gracefully stopping processes [%s] for %ss' % (self.name, self.graceful_timeout)) # We ignore the hook result self.call_hook('before_stop') yield self.kill_processes() self.reap_processes() # stop redirectors if self.stream_redirector: self.stream_redirector.stop() self.stream_redirector = None if close_output_streams: if self.stdout_stream and hasattr(self.stdout_stream, 'close'): self.stdout_stream.close() if self.stderr_stream and hasattr(self.stderr_stream, 'close'): self.stderr_stream.close() # notify about the stop if skip: logger.info('%s left running in papa', self.name) else: if self.evpub_socket is not None: self.notify_event("stop", {"time": time.time()}) self._status = "stopped" # We ignore the hook result self.call_hook('after_stop') logger.info('%s stopped', self.name)
def spawn_process(self): """Spawn process. """ if self.stopped: return if not self.call_hook("before_spawn"): self.stopped = True return False cmd = util.replace_gnu_args(self.cmd, sockets=self._get_sockets_fds(), env=self.env) self._process_counter += 1 nb_tries = 0 while nb_tries < self.max_retry or self.max_retry == -1: process = None pipe_stdout = self.stdout_redirector is not None pipe_stderr = self.stderr_redirector is not None try: process = Process( self._process_counter, cmd, args=self.args, working_dir=self.working_dir, shell=self.shell, uid=self.uid, gid=self.gid, env=self.env, rlimits=self.rlimits, executable=self.executable, use_fds=self.use_sockets, watcher=self, pipe_stdout=pipe_stdout, pipe_stderr=pipe_stderr, close_child_stdout=self.close_child_stdout, close_child_stderr=self.close_child_stderr, ) # stream stderr/stdout if configured if pipe_stdout: self.stdout_redirector.add_redirection("stdout", process, process.stdout) if pipe_stderr: self.stderr_redirector.add_redirection("stderr", process, process.stderr) self.processes[process.pid] = process logger.debug("running %s process [pid %d]", self.name, process.pid) except OSError as e: logger.warning("error in %r: %s", self.name, str(e)) if process is None: nb_tries += 1 continue else: self.notify_event("spawn", {"process_pid": process.pid, "time": time.time()}) time.sleep(self.warmup_delay) return self.stop()
def kill_process(self, process, sig=signal.SIGTERM): """Kill process. """ self.send_msg("kill", {"process_id": process.wid, "time": time.time()}) logger.debug("%s: kill process %s", self.name, process.pid) process.send_signal(sig)
def bind_and_listen(self): self.bind((self.host, self.port)) self.setblocking(0) self.listen(self.backlog) self.host, self.port = self.getsockname() logger.debug('Socket bound at %s:%d - fd: %d' % (self.host, self.port, self.fileno()))
def manage_processes(self): """Manage processes.""" if self.stopped: return if self.max_age: for process in self.processes.itervalues(): max_age = self.max_age + randint(0, self.max_age_variance) if process.age() > max_age: logger.debug('%s: expired, respawning', self.name) self.notify_event("expired", { "process_pid": process.pid, "time": time.time() }) self.kill_process(process) if self.respawn and len(self.processes) < self.numprocesses: self.spawn_processes() processes = self.processes.values() processes.sort() while len(processes) > self.numprocesses: process = processes.pop(0) if process.status == DEAD_OR_ZOMBIE: self.processes.pop(process.pid) else: self.processes.pop(process.pid) self.kill_process(process)
def manage_processes(self): """Manage processes.""" if self.stopped: return # removing old processes if self.max_age: max_age = self.max_age + randint(0, self.max_age_variance) for process in list(self.processes.itervalues()): if process.age() <= max_age: continue logger.debug('%s: expired, respawning', self.name) self.notify_event("expired", {"process_pid": process.pid, "time": time.time()}) self.processes.pop(process.pid) if process.status != DEAD_OR_ZOMBIE: self.kill_process(process) # adding fresh processes if self.respawn and len(self.processes) < self.numprocesses: self.spawn_processes() # removing extra processes processes = self.processes.values() processes.sort() while len(processes) > self.numprocesses: process = processes.pop(0) if process.status == DEAD_OR_ZOMBIE: self.processes.pop(process.pid) else: self.processes.pop(process.pid) self.kill_process(process)
def kill_process(self, process, sig=signal.SIGTERM): """Kill process. """ # remove redirections if self.stdout_redirector is not None: self.stdout_redirector.remove_redirection(process.stdout) if self.stderr_redirector is not None: self.stderr_redirector.remove_redirection(process.stderr) logger.debug("%s: kill process %s", self.name, process.pid) try: # sending the same signal to all the children for child_pid in process.children(): process.send_signal_child(child_pid, sig) self.notify_event("kill", { "process_pid": child_pid, "time": time.time() }) # now sending the signal to the process itself self.send_signal(process.pid, sig) self.notify_event("kill", { "process_pid": process.pid, "time": time.time() }) except NoSuchProcess: # already dead ! return process.stop()
def send_signal(self, sig): """Sends a signal **sig** to the process.""" logger.debug("sending signal %s to %s" % (sig, self.pid)) if sig == signal.CTRL_BREAK_EVENT or sig == signal.CTRL_C_EVENT: return os.kill(self._worker.pid, sig) else: return self._worker.send_signal(sig)
def stop(self): """Stop. """ logger.debug("stopping the %s watcher" % self.name) # stop redirectors if self.stdout_redirector is not None: self.stdout_redirector.kill() if self.stderr_redirector is not None: self.stderr_redirector.kill() limit = time.time() + self.graceful_timeout logger.debug("gracefully stopping processes [%s] for %ss" % (self.name, self.graceful_timeout)) while self.get_active_pids() and time.time() < limit: self.kill_processes(signal.SIGTERM) time.sleep(0.1) self.reap_processes() self.kill_processes(signal.SIGKILL) if self.evpub_socket is not None: self.send_msg("stop", {"time": time.time()}) self.stopped = True logger.info("%s stopped", self.name)
def reap_process(self, pid, status=None): """ensure that the process is killed (and not a zombie)""" process = self.processes.pop(pid) if not status: while True: try: _, status = os.waitpid(pid, os.WNOHANG) except OSError as e: if e.errno == errno.EAGAIN: time.sleep(0.001) continue elif e.errno == errno.ECHILD: # nothing to do here, we do not have any child process running return else: raise # get return code if os.WIFSIGNALED(status): retcode = os.WTERMSIG(status) # process exited using exit(2) system call; return the # integer exit(2) system call has been called with elif os.WIFEXITED(status): retcode = os.WEXITSTATUS(status) else: # should never happen raise RuntimeError("Unknown process exit status") # if the process is dead or a zombie try to definitely stop it. if retcode in (STATUS_ZOMBIE, STATUS_DEAD): process.stop() logger.debug('reaping process %s [%s]' % (pid, self.name)) self.notify_event("reap", {"process_pid": pid, "time": time.time()})
def start(self): if not self.active: raise ValueError('Will not start an inactive plugin') self.handle_init() self.initialize() self.running = True while True: try: self.loop.start() except zmq.ZMQError as e: logger.debug(str(e)) if e.errno == errno.EINTR: continue elif e.errno == zmq.ETERM: break else: logger.debug("got an unexpected error %s (%s)", str(e), e.errno) raise else: break self.substream.close() self.client.close() self.sub_socket.close() self.context.destroy()
def start(self): """Starts all the watchers. If the ioloop has been provided during __init__() call, starts all watchers as a standard coroutine If the ioloop hasn't been provided during __init__() call (default), starts all watchers and the eventloop (and blocks here). In this mode the method MUST NOT yield anything because it's called as a standard method. """ logger.info("Starting master on pid %s", self.pid) self.initialize() # start controller self.ctrl.start() self._restarting = False try: # initialize processes logger.debug('Initializing watchers') if self._provided_loop: yield self.start_watchers() else: # start_watchers will be called just after the start_io_loop() self.loop.add_future(self.start_watchers(), lambda x: None) logger.info('Arbiter now waiting for commands') if not self._provided_loop: # If an event loop is not provided, block at this line self.start_io_loop() finally: if not self._provided_loop: # If an event loop is not provided, do some cleaning self.stop_controller_and_close_sockets() raise gen.Return(self._restarting)
def start(self): """Start. """ if not self.stopped: return self.stopped = False if not self.call_hook('before_start'): logger.debug('Aborting startup') self.stopped = True return False self._create_redirectors() self.reap_processes() self.spawn_processes() if not self.call_hook('after_start'): logger.debug('Aborting startup') self.stop() return False if self.stdout_redirector is not None: self.stdout_redirector.start() if self.stderr_redirector is not None: self.stderr_redirector.start() logger.info('%s started' % self.name) self.notify_event("start", {"time": time.time()}) return True
def spawn_process(self): """Spawn process. """ if self.stopped: return self._process_counter += 1 nb_tries = 0 while nb_tries < self.max_retry: process = None try: process = Process(self._process_counter, self.cmd, args=self.args, working_dir=self.working_dir, shell=self.shell, uid=self.uid, gid=self.gid, env=self.env, rlimits=self.rlimits, executable=self.executable) self.processes[self._process_counter] = process logger.debug('running %s process [pid %d]', self.name, process.pid) except OSError, e: logger.warning('error in %r: %s', self.name, str(e)) if process is None: nb_tries += 1 continue else: self.send_msg("spawn", {"process_id": process.wid, "process_pid": process.pid, "time": time.time()}) time.sleep(self.warmup_delay) return
def stop(self): """Stop. """ logger.debug('stopping the %s watcher' % self.name) # stop redirectors if self.stdout_redirector is not None: self.stdout_redirector.kill() if self.stderr_redirector is not None: self.stderr_redirector.kill() limit = time.time() + self.graceful_timeout logger.debug('gracefully stopping processes [%s] for %ss' % (self.name, self.graceful_timeout)) while self.get_active_processes() and time.time() < limit: self.kill_processes(signal.SIGTERM) try: time.sleep(0.1) except KeyboardInterrupt: pass self.reap_processes() self.kill_processes(signal.SIGKILL) if self.evpub_socket is not None: self.notify_event("stop", {"time": time.time()}) self.stopped = True logger.info('%s stopped', self.name)
def remove_pid(self, watcher, pid): if pid in self._pids[watcher]: logger.debug("Removing %d from %s" % (pid, watcher)) self._pids[watcher].remove(pid) if len(self._pids[watcher]) == 0: logger.debug("Stopping the periodic callback for {0}".format(watcher)) self._callbacks[watcher].stop()
def dispatch(self, job): cid, msg = job try: json_msg = json.loads(msg) except ValueError: return self.send_error(cid, msg, "json invalid", errno=errors.INVALID_JSON) cmd_name = json_msg.get('command') properties = json_msg.get('properties', {}) cast = json_msg.get('msg_type') == "cast" try: if cmd_name is None: error = "no cmd: %r" % cmd_name return self.send_error(cid, msg, error, cast=cast, errno=errors.UNKNOWN_COMMAND) cmd = self.commands[cmd_name.lower()] except KeyError: error = "unknown command: %r" % cmd_name return self.send_error(cid, msg, error, cast=cast, errno=errors.UNKNOWN_COMMAND) try: cmd.validate(properties) resp = cmd.execute(self.arbiter, properties) except MessageError as e: return self.send_error(cid, msg, str(e), cast=cast, errno=errors.MESSAGE_ERROR) except OSError as e: return self.send_error(cid, msg, str(e), cast=cast, errno=errors.OS_ERROR) except: exctype, value = sys.exc_info()[:2] tb = traceback.format_exc() reason = "command %r: %s" % (msg, value) logger.debug("error: command %r: %s\n\n%s", msg, value, tb) return self.send_error(cid, msg, reason, tb, cast=cast, errno=errors.COMMAND_ERROR) if resp is None: resp = ok() if not isinstance(resp, (dict, list,)): msg = "msg %r tried to send a non-dict: %s" % (msg, str(resp)) logger.error("msg %r tried to send a non-dict: %s", msg, str(resp)) return self.send_error(cid, msg, "server error", cast=cast, errno=errors.BAD_MSG_DATA_ERROR) if isinstance(resp, list): resp = {"results": resp} self.send_ok(cid, msg, resp, cast=cast) if cmd_name.lower() == "quit": if cid is not None: self.stream.flush() self.arbiter.stop()
def kill_process(self, process): """Kill process (stop_signal, graceful_timeout then SIGKILL) """ if process.stopping: raise gen.Return(False) logger.debug("%s: kill process %s", self.name, process.pid) if self.stop_children: self.send_signal_process(process, self.stop_signal) else: self.send_signal(process.pid, self.stop_signal) self.notify_event("kill", { "process_pid": process.pid, "time": time.time() }) process.stopping = True waited = 0 while waited < self.graceful_timeout: yield tornado_sleep(1) waited += 1 if not process.is_alive(): break if waited >= self.graceful_timeout: # We are not smart anymore self.send_signal_process(process, signal.SIGKILL) self._process_remove_redirections(process) process.stopping = False process.stop() raise gen.Return(True)
def reap_process(self, pid, status=None): """ensure that the process is killed (and not a zombie)""" if pid not in self.processes: return process = self.processes.pop(pid) if status is None: while True: try: _, status = os.waitpid(pid, os.WNOHANG) except OSError as e: if e.errno == errno.EAGAIN: time.sleep(0.001) continue elif e.errno == errno.ECHILD: # nothing to do here, we do not have any child # process running # but we still need to send the "reap" signal. # # This can happen if poll() or wait() were called on # the underlying process. logger.debug('reaping already dead process %s [%s]', pid, self.name) self.notify_event( "reap", {"process_pid": pid, "time": time.time(), "exit_code": process.returncode()}) process.stop() return else: raise # get return code if os.WIFSIGNALED(status): # The Python Popen object returns <-signal> in it's returncode # property if the process exited on a signal, so emulate that # behavior here so that pubsub clients watching for reap can # distinguish between an exit with a non-zero exit code and # a signal'd exit. This is also consistent with the notify_event # reap message above that uses the returncode function (that ends # up calling Popen.returncode) exit_code = -os.WTERMSIG(status) # process exited using exit(2) system call; return the # integer exit(2) system call has been called with elif os.WIFEXITED(status): exit_code = os.WEXITSTATUS(status) else: # should never happen raise RuntimeError("Unknown process exit status") # if the process is dead or a zombie try to definitely stop it. if process.status in (DEAD_OR_ZOMBIE, UNEXISTING): process.stop() logger.debug('reaping process %s [%s]', pid, self.name) self.notify_event("reap", {"process_pid": pid, "time": time.time(), "exit_code": exit_code})
def kill_process(self, process, sig=signal.SIGTERM): """Kill process. """ # remove redirections if self.stdout_redirector is not None: self.stdout_redirector.remove_redirection('stdout', process) if self.stderr_redirector is not None: self.stderr_redirector.remove_redirection('stderr', process) logger.debug("%s: kill process %s", self.name, process.pid) try: # sending the same signal to all the children for child_pid in process.children(): process.send_signal_child(child_pid, sig) self.notify_event("kill", {"process_pid": child_pid, "time": time.time()}) # now sending the signal to the process itself self.send_signal(process.pid, sig) self.notify_event("kill", {"process_pid": process.pid, "time": time.time()}) except NoSuchProcess: # already dead ! return process.stop()
def stop(self): """Stop. """ logger.debug('stopping the %s watcher' % self.name) # stop redirectors if self.stdout_redirector is not None: self.stdout_redirector.stop() self.stdout_redirector = None if self.stderr_redirector is not None: self.stderr_redirector.stop() self.stderr_redirector = None limit = time.time() + self.graceful_timeout logger.debug('gracefully stopping processes [%s] for %ss' % (self.name, self.graceful_timeout)) # We ignore the hook result self.call_hook('before_stop') while self.get_active_processes() and time.time() < limit: self.kill_processes(signal.SIGTERM) self.reap_processes() self.kill_processes(signal.SIGKILL) if self.evpub_socket is not None: self.notify_event("stop", {"time": time.time()}) self.stopped = True # We ignore the hook result self.call_hook('after_stop') logger.info('%s stopped', self.name)
def handle_recv(self, data): """Handle received message from circusd We need to handle two messages: - spawn: add a new monitored child pid - reap: remove a killed child pid from monitoring """ topic, msg = data topic_parts = topic.split(".") logger.debug("received data from circusd: %s, %s", topic_parts, msg) # check if monitored watchers: if (topic_parts[0] == 'watcher' and self._match_watcher_name(topic_parts[1])): try: message = json.loads(msg) except ValueError: logger.error("Error while decoding json for message: %s", msg) else: if "process_pid" not in message: logger.warning('no process_pid in message') return pid = str(message.get("process_pid")) if topic_parts[2] == "spawn": self.pid_status[pid] = dict(watcher=topic_parts[1], last_activity=time.time()) logger.info("added new monitored pid for %s:%s", topic_parts[1], pid) elif topic_parts[2] == "reap": old_pid = self.pid_status.pop(pid) logger.info("removed monitored pid for %s:%s", old_pid['watcher'], pid)
def read_config(config_path): cfg = DefaultConfigParser() with open(config_path) as f: if hasattr(cfg, 'read_file'): cfg.read_file(f) else: cfg.readfp(f) current_dir = os.path.dirname(config_path) # load included config files includes = [] def _scan(filename, includes): if os.path.abspath(filename) != filename: filename = os.path.join(current_dir, filename) paths = glob.glob(filename) if paths == []: logger.warn('%r does not lead to any config. Make sure ' 'include paths are relative to the main config ' 'file' % filename) includes += paths for include_file in cfg.dget('circus', 'include', '').split(): _scan(include_file, includes) for include_dir in cfg.dget('circus', 'include_dir', '').split(): _scan(os.path.join(include_dir, '*.ini'), includes) logger.debug('Reading config files: %s' % includes) return cfg, [config_path] + cfg.read(includes)
def spawn_process(self): """Spawn process. Return True if ok, False if the watcher must be stopped """ if self.is_stopped(): return True if not self.call_hook('before_spawn'): return False cmd = util.replace_gnu_args(self.cmd, env=self.env) nb_tries = 0 while nb_tries < self.max_retry or self.max_retry == -1: process = None pipe_stdout = self.stdout_redirector is not None pipe_stderr = self.stderr_redirector is not None try: process = Process(self._nextwid, cmd, args=self.args, working_dir=self.working_dir, shell=self.shell, uid=self.uid, gid=self.gid, env=self.env, rlimits=self.rlimits, executable=self.executable, use_fds=self.use_sockets, watcher=self, pipe_stdout=pipe_stdout, pipe_stderr=pipe_stderr, close_child_stdout=self.close_child_stdout, close_child_stderr=self.close_child_stderr) # stream stderr/stdout if configured if pipe_stdout and self.stdout_redirector is not None: self.stdout_redirector.add_redirection('stdout', process, process.stdout) if pipe_stderr and self.stderr_redirector is not None: self.stderr_redirector.add_redirection('stderr', process, process.stderr) self.processes[process.pid] = process logger.debug('running %s process [pid %d]', self.name, process.pid) if not self.call_hook('after_spawn', pid=process.pid): self.kill_process(process) del self.processes[process.pid] return False except OSError as e: logger.warning('error in %r: %s', self.name, str(e)) if process is None: nb_tries += 1 continue else: self.notify_event("spawn", {"process_pid": process.pid, "time": time.time()}) return True return False
def start(self): """Starts all the watchers. The start command is an infinite loop that waits for any command from a client and that watches all the processes and restarts them if needed. """ logger.info("Starting master on pid %s", self.pid) self.initialize() # start controller self.ctrl.start() # initialize processes logger.debug("Initializing watchers") for watcher in self.iter_watchers(): watcher.start() logger.info("Arbiter now waiting for commands") while True: try: self.loop.start() except zmq.ZMQError as e: if e.errno == errno.EINTR: continue else: raise else: break self.ctrl.stop() self.evpub_socket.close()
def kill_process(self, process): """Kill process (stop_signal, graceful_timeout then SIGKILL) """ if process.stopping: raise gen.Return(False) logger.debug("%s: kill process %s", self.name, process.pid) if self.stop_children: self.send_signal_process(process, self.stop_signal) else: self.send_signal(process.pid, self.stop_signal) self.notify_event("kill", {"process_pid": process.pid, "time": time.time()}) process.stopping = True waited = 0 while waited < self.graceful_timeout: yield tornado_sleep(1) waited += 1 if not process.is_alive(): break if waited >= self.graceful_timeout: # We are not smart anymore self.send_signal_process(process, signal.SIGKILL) self._process_remove_redirections(process) process.stopping = False process.stop() raise gen.Return(True)
def stop(self): """Stop. """ logger.debug('stopping the %s watcher' % self.name) # stop redirectors if self.stdout_redirector is not None: self.stdout_redirector.kill() if self.stderr_redirector is not None: self.stderr_redirector.kill() limit = time.time() + self.graceful_timeout logger.debug('gracefully stopping processes [%s] for %ss' % ( self.name, self.graceful_timeout)) while self.get_active_processes() and time.time() < limit: self.kill_processes(signal.SIGTERM) try: time.sleep(0.1) except KeyboardInterrupt: pass self.reap_processes() self.kill_processes(signal.SIGKILL) if self.evpub_socket is not None: self.notify_event("stop", {"time": time.time()}) self.stopped = True logger.info('%s stopped', self.name)
def _start(self): """Start. """ if not self.is_stopped(): return if self.on_demand and not self.arbiter.socket_event: return if not self.call_hook('before_start'): logger.debug('Aborting startup') return self._status = "starting" self._create_redirectors() self.reap_processes() yield self.spawn_processes() if not self.call_hook('after_start'): logger.debug('Aborting startup') yield self._stop() return if self.stdout_redirector is not None: self.stdout_redirector.start() if self.stderr_redirector is not None: self.stderr_redirector.start() self._status = "active" logger.info('%s started' % self.name) self.notify_event("start", {"time": time.time()})
def stop(self): """Stop. """ logger.debug('stopping the %s watcher' % self.name) # stop redirectors if self.stdout_redirector is not None: self.stdout_redirector.kill() if self.stderr_redirector is not None: self.stderr_redirector.kill() limit = time.time() + self.graceful_timeout logger.debug('gracefully stopping processes [%s] for %ss' % ( self.name, self.graceful_timeout)) # We ignore the hook result self.call_hook('before_stop') while self.get_active_processes() and time.time() < limit: self.kill_processes(signal.SIGTERM) self.reap_processes() self.kill_processes(signal.SIGKILL) if self.evpub_socket is not None: self.notify_event("stop", {"time": time.time()}) self.stopped = True # We ignore the hook result self.call_hook('after_stop') logger.info('%s stopped', self.name)
def start(self): """Starts all the watchers. The start command is an infinite loop that waits for any command from a client and that watches all the processes and restarts them if needed. """ logger.info("Starting master on pid %s", self.pid) self.initialize() # start controller self.ctrl.start() # initialize processes logger.debug('Initializing watchers') for watcher in self.iter_watchers(): watcher.start() logger.info('Arbiter now waiting for commands') while True: try: self.loop.start() except zmq.ZMQError as e: if e.errno == errno.EINTR: continue else: raise else: break self.ctrl.stop() self.evpub_socket.close()
def remove_pid(self, watcher, pid): if pid in self._pids[watcher]: logger.debug('Removing %d from %s' % (pid, watcher)) self._pids[watcher].remove(pid) if len(self._pids[watcher]) == 0: logger.debug( 'Stopping the periodic callback for {0}'.format(watcher)) self._callbacks[watcher].stop()
def spawn_process(self): """Spawn process. Return True if ok, False if the watcher must be stopped """ if self.is_stopped(): return True if not self.call_hook('before_spawn'): return False cmd = util.replace_gnu_args(self.cmd, sockets=self._get_sockets_fds(), env=self.env) nb_tries = 0 while nb_tries < self.max_retry or self.max_retry == -1: process = None pipe_stdout = self.stdout_redirector is not None pipe_stderr = self.stderr_redirector is not None try: process = Process(self._nextwid, cmd, args=self.args, working_dir=self.working_dir, shell=self.shell, uid=self.uid, gid=self.gid, env=self.env, rlimits=self.rlimits, executable=self.executable, use_fds=self.use_sockets, watcher=self, pipe_stdout=pipe_stdout, pipe_stderr=pipe_stderr, close_child_stdout=self.close_child_stdout, close_child_stderr=self.close_child_stderr) # stream stderr/stdout if configured if pipe_stdout and self.stdout_redirector is not None: self.stdout_redirector.add_redirection('stdout', process, process.stdout) if pipe_stderr and self.stderr_redirector is not None: self.stderr_redirector.add_redirection('stderr', process, process.stderr) self.processes[process.pid] = process logger.debug('running %s process [pid %d]', self.name, process.pid) except OSError as e: logger.warning('error in %r: %s', self.name, str(e)) if process is None: nb_tries += 1 continue else: self.notify_event("spawn", {"process_pid": process.pid, "time": time.time()}) return True return False
def dispatch(self, job): cid, msg = job try: json_msg = json.loads(msg) except ValueError: return self.send_error(cid, msg, "json invalid", errno=errors.INVALID_JSON) cmd_name = json_msg.get('command') properties = json_msg.get('properties', {}) cast = json_msg.get('msg_type') == "cast" try: cmd = self.commands[cmd_name.lower()] except KeyError: error = "unknown command: %r" % cmd_name return self.send_error(cid, msg, error, cast=cast, errno=errors.UNKNOWN_COMMAND) try: cmd.validate(properties) resp = cmd.execute(self.arbiter, properties) except MessageError as e: return self.send_error(cid, msg, str(e), cast=cast, errno=errors.MESSAGE_ERROR) except OSError as e: return self.send_error(cid, msg, str(e), cast=cast, errno=errors.OS_ERROR) except: exctype, value = sys.exc_info()[:2] tb = traceback.format_exc() reason = "command %r: %s" % (msg, value) logger.debug("error: command %r: %s\n\n%s", msg, value, tb) return self.send_error(cid, msg, reason, tb, cast=cast, errno=errors.COMMAND_ERROR) if resp is None: resp = ok() if not isinstance(resp, (dict, list,)): msg = "msg %r tried to send a non-dict: %s" % (msg, str(resp)) logger.error("msg %r tried to send a non-dict: %s", msg, str(resp)) return self.send_error(cid, msg, "server error", cast=cast, errno=errors.BAD_MSG_DATA_ERROR) if isinstance(resp, list): resp = {"results": resp} self.send_ok(cid, msg, resp, cast=cast) if cmd_name.lower() == "quit": if cid is not None: self.stream.flush() self.arbiter.stop()
def spawn_process(self): """Spawn process. """ if self.stopped: return cmd = util.replace_gnu_args(self.cmd, sockets=self._get_sockets_fds()) self._process_counter += 1 nb_tries = 0 pipe_stdout = self.stdout_redirector is not None pipe_stderr = self.stderr_redirector is not None while nb_tries < self.max_retry or self.max_retry == -1: process = None try: process = Process(self._process_counter, cmd, args=self.args, working_dir=self.working_dir, shell=self.shell, uid=self.uid, gid=self.gid, env=self.env, rlimits=self.rlimits, executable=self.executable, use_fds=self.use_sockets, watcher=self, pipe_stdout=pipe_stdout, pipe_stderr=pipe_stderr, close_child_stdout=self.close_child_stdout, close_child_stderr=self.close_child_stderr) # stream stderr/stdout if configured if pipe_stdout: self.stdout_redirector.add_redirection( 'stdout', process, process.stdout) if pipe_stderr: self.stderr_redirector.add_redirection( 'stderr', process, process.stderr) self.processes[process.pid] = process logger.debug('running %s process [pid %d]', self.name, process.pid) except OSError, e: logger.warning('error in %r: %s', self.name, str(e)) if process is None: nb_tries += 1 continue else: self.notify_event("spawn", { "process_pid": process.pid, "time": time.time() }) time.sleep(self.warmup_delay) return
def handle_message(self, raw_msg): cid, msg = raw_msg msg = msg.strip() if not msg: self.send_response(None, cid, msg, "error: empty command") else: logger.debug("got message %s", msg) self.dispatch((cid, msg))
def execute(self, arbiter, props): if 'name' in props: watcher = self._get_watcher(arbiter, props['name']) processes = watcher.get_active_processes() status = [(p.pid, p.status) for p in processes] logger.debug('here is the status of the processes %s' % status) return {"pids": [p.pid for p in processes]} else: watchers = sorted(arbiter._watchers_names) return {"watchers": [name for name in watchers]}
def reap_processes(self): """Reap all the processes for this watcher. """ if self.is_stopped(): logger.debug('do not reap processes as the watcher is stopped') return # reap_process changes our dict, look through the copy of keys for pid in list(self.processes.keys()): self.reap_process(pid)
def _log(self, *args, **kw): if os.environ.get('DEBUG') is None: return func(self, *args, **kw) cls = self.__class__.__name__ logger.debug("'%s.%s' starts" % (cls, func.func_name)) try: return func(self, *args, **kw) finally: logger.debug("'%s.%s' ends" % (cls, func.func_name))
def _send_signal(self, process, signum): is_sigkill = hasattr(signal, 'SIGKILL') and signum == signal.SIGKILL pid = process.pid hook_result = self.call_hook("before_signal", pid=pid, signum=signum) if not is_sigkill and not hook_result: logger.debug("before_signal hook didn't return True " "=> signal %i is not sent to %i" % (signum, pid)) else: process.send_signal(signum) self.call_hook("after_signal", pid=pid, signum=signum)
def manage_watchers(self): if self._managing_watchers_future is not None: logger.debug("manage_watchers is already running...") return try: self._managing_watchers_future = self.arbiter.manage_watchers() self.loop.add_future(self._managing_watchers_future, self._manage_watchers_cb) except ConflictError: logger.debug("manage_watchers is conflicting with another command")