def _start_child(self, wrap): if len(wrap.forktimes) > wrap.workers: # Limit ourselves to one process a second (over the period of # number of workers * 1 second). This will allow workers to # start up quickly but ensure we don't fork off children that # die instantly too quickly. if time.time() - wrap.forktimes[0] < wrap.workers: LOG.info(_LI('Forking too fast, sleeping')) time.sleep(1) wrap.forktimes.pop(0) wrap.forktimes.append(time.time()) pid = os.fork() if pid == 0: launcher = self._child_process(wrap.service) while True: self._child_process_handle_signal() status, signo = self._child_wait_for_exit_or_signal(launcher) if not _is_sighup_and_daemon(signo): break launcher.restart() os._exit(status) LOG.info(_LI('Started child %d'), pid) wrap.children.add(pid) self.children[pid] = wrap return pid
def wait(self): """Loop waiting on children to die and respawning as necessary.""" systemd.notify_once() LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, std_logging.DEBUG) try: while True: self.handle_signal() self._respawn_children() # No signal means that stop was called. Don't clean up here. if not self.sigcaught: return signame = _signo_to_signame(self.sigcaught) LOG.info(_LI('Caught %s, stopping children'), signame) if not _is_sighup_and_daemon(self.sigcaught): break for pid in self.children: os.kill(pid, signal.SIGHUP) self.running = True self.sigcaught = None except eventlet.greenlet.GreenletExit: LOG.info(_LI("Wait called after thread killed. Cleaning up.")) self.stop()
def _connect(self, params): """Connect to rabbit. Re-establish any queues that may have been declared before if we are reconnecting. Exceptions should be handled by the caller. """ if self.connection: LOG.info(_LI("Reconnecting to AMQP server on " "%(hostname)s:%(port)d") % params) try: self.connection.release() except self.connection_errors: pass # Setting this in case the next statement fails, though # it shouldn't be doing any network operations, yet. self.connection = None self.connection = kombu.connection.BrokerConnection(**params) self.connection_errors = self.connection.connection_errors if self.memory_transport: # Kludge to speed up tests. self.connection.transport.polling_interval = 0.0 self.consumer_num = itertools.count(1) self.connection.connect() self.channel = self.connection.channel() # work around 'memory' transport bug in 1.1.3 if self.memory_transport: self.channel._new_queue('ae.undeliver') for consumer in self.consumers: consumer.reconnect(self.channel) LOG.info(_LI('Connected to AMQP server on %(hostname)s:%(port)d') % params)
def _wait_child(self): try: # Don't block if no child processes have exited pid, status = os.waitpid(0, os.WNOHANG) if not pid: return None except OSError as exc: if exc.errno not in (errno.EINTR, errno.ECHILD): raise return None if os.WIFSIGNALED(status): sig = os.WTERMSIG(status) LOG.info(_LI('Child %(pid)d killed by signal %(sig)d'), dict(pid=pid, sig=sig)) else: code = os.WEXITSTATUS(status) LOG.info(_LI('Child %(pid)s exited with status %(code)d'), dict(pid=pid, code=code)) if pid not in self.children: LOG.warning(_LW('pid %d not in child list'), pid) return None wrap = self.children.pop(pid) wrap.children.remove(pid) return wrap
def get_filtered_objects(self, filter_classes, objs, filter_properties, index=0): """Get objects after filter :param filter_classes: filters that will be used to filter the objects :param objs: objects that will be filtered :param filter_properties: client filter properties :param index: This value needs to be increased in the caller function of get_filtered_objects when handling each resource. """ list_objs = list(objs) LOG.debug("Starting with %d host(s)", len(list_objs)) for filter_cls in filter_classes: cls_name = filter_cls.__name__ filter_class = filter_cls() if filter_class.run_filter_for_index(index): objs = filter_class.filter_all(list_objs, filter_properties) if objs is None: LOG.debug("Filter %(cls_name)s says to stop filtering", {'cls_name': cls_name}) return list_objs = list(objs) msg = (_LI("Filter %(cls_name)s returned %(obj_len)d host(s)") % {'cls_name': cls_name, 'obj_len': len(list_objs)}) if not list_objs: LOG.info(msg) break LOG.debug(msg) return list_objs
def _wait_for_exit_or_signal(self, ready_callback=None): status = None signo = 0 LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, std_logging.DEBUG) try: if ready_callback: ready_callback() super(ServiceLauncher, self).wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code finally: self.stop() if rpc: try: rpc.cleanup() except Exception: # We're shutting down, so it doesn't matter at this point. LOG.exception(_LE('Exception during rpc cleanup.')) return status, signo
def create_consumer(self, topic, proxy, fanout=False): # Register with matchmaker. _get_matchmaker().register(topic, CONF.rpc_zmq_host) # Subscription scenarios if fanout: sock_type = zmq.SUB subscribe = ('', fanout)[type(fanout) == str] topic = 'fanout~' + topic.split('.', 1)[0] else: sock_type = zmq.PULL subscribe = None topic = '.'.join((topic.split('.', 1)[0], CONF.rpc_zmq_host)) if topic in self.topics: LOG.info(_LI("Skipping topic registration. Already registered.")) return # Receive messages from (local) proxy inaddr = "ipc://%s/zmq_topic_%s" % \ (CONF.rpc_zmq_ipc_dir, topic) LOG.debug("Consumer is a zmq.%s", ['PULL', 'SUB'][sock_type == zmq.SUB]) self.reactor.register(proxy, inaddr, sock_type, subscribe=subscribe, in_bind=False) self.topics.append(topic)
def publisher(waiter): LOG.info(_LI("Creating proxy for topic: %s"), topic) try: # The topic is received over the network, # don't trust this input. if self.badchars.search(topic) is not None: emsg = _("Topic contained dangerous characters.") LOG.warn(emsg) raise RPCException(emsg) out_sock = ZmqSocket("ipc://%s/zmq_topic_%s" % (ipc_dir, topic), sock_type, bind=True) except RPCException: waiter.send_exception(*sys.exc_info()) return self.topic_proxy[topic] = eventlet.queue.LightQueue( CONF.rpc_zmq_topic_backlog) self.sockets.append(out_sock) # It takes some time for a pub socket to open, # before we can have any faith in doing a send() to it. if sock_type == zmq.PUB: eventlet.sleep(.5) waiter.send(True) while(True): data = self.topic_proxy[topic].get() out_sock.send(data, copy=False)
def register(self, proxy, in_addr, zmq_type_in, in_bind=True, subscribe=None): LOG.info(_LI("Registering reactor")) if zmq_type_in not in (zmq.PULL, zmq.SUB): raise RPCException("Bad input socktype") # Items push in. inq = ZmqSocket(in_addr, zmq_type_in, bind=in_bind, subscribe=subscribe) self.proxies[inq] = proxy self.sockets.append(inq) LOG.info(_LI("In reactor registered"))
def acquire(self): basedir = os.path.dirname(self.fname) if not os.path.exists(basedir): fileutils.ensure_tree(basedir) LOG.info(_LI('Created lock path: %s'), basedir) self.lockfile = open(self.fname, 'w') while True: try: # Using non-blocking locks since green threads are not # patched to deal with blocking locking calls. # Also upon reading the MSDN docs for locking(), it seems # to have a laughable 10 attempts "blocking" mechanism. self.trylock() LOG.debug('Got file lock "%s"', self.fname) return True except IOError as e: if e.errno in (errno.EACCES, errno.EAGAIN): # external locks synchronise things like iptables # updates - give it some time to prevent busy spinning time.sleep(0.01) else: raise threading.ThreadError(_("Unable to acquire lock on" " `%(filename)s` due to" " %(exception)s") % { 'filename': self.fname, 'exception': e, })
def _pipe_watcher(self): # This will block until the write end is closed when the parent # dies unexpectedly self.readpipe.read() LOG.info(_LI('Parent process has died unexpectedly, exiting')) sys.exit(1)
def unregister(self, key, host): """Unregister a topic.""" if (key, host) in self.host_topic: del self.host_topic[(key, host)] self.hosts.discard(host) self.backend_unregister(key, '.'.join((key, host))) LOG.info(_LI("Matchmaker unregistered: %(key)s, %(host)s"), {'key': key, 'host': host})
def remove_external_lock_file(name, lock_file_prefix=None): """Remove an external lock file when it's not used anymore This will be helpful when we have a lot of lock files """ with internal_lock(name): lock_file_path = _get_lock_path(name, lock_file_prefix) try: os.remove(lock_file_path) except OSError: LOG.info(_LI('Failed to remove file %(file)s'), {'file': lock_file_path})
def wait(self): """Loop waiting on children to die and respawning as necessary.""" systemd.notify_once() LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, std_logging.DEBUG) try: while True: self.handle_signal() self._respawn_children() if self.sigcaught: signame = _signo_to_signame(self.sigcaught) LOG.info(_LI('Caught %s, stopping children'), signame) if not _is_sighup_and_daemon(self.sigcaught): break for pid in self.children: os.kill(pid, signal.SIGHUP) self.running = True self.sigcaught = None except eventlet.greenlet.GreenletExit: LOG.info(_LI("Wait called after thread killed. Cleaning up.")) for pid in self.children: try: os.kill(pid, signal.SIGTERM) except OSError as exc: if exc.errno != errno.ESRCH: raise # Wait for children to die if self.children: LOG.info(_LI('Waiting on %d children to exit'), len(self.children)) while self.children: self._wait_child()
def stop(self): """Terminate child processes and wait on each.""" self.running = False for pid in self.children: try: os.kill(pid, signal.SIGTERM) except OSError as exc: if exc.errno != errno.ESRCH: raise # Wait for children to die if self.children: LOG.info(_LI('Waiting on %d children to exit'), len(self.children)) while self.children: self._wait_child()
def initialize_if_enabled(): backdoor_locals = { 'exit': _dont_use_this, # So we don't exit the entire process 'quit': _dont_use_this, # So we don't exit the entire process 'fo': _find_objects, 'pgt': _print_greenthreads, 'pnt': _print_nativethreads, } if CONF.backdoor_port is None: return None start_port, end_port = _parse_port_range(str(CONF.backdoor_port)) # NOTE(johannes): The standard sys.displayhook will print the value of # the last expression and set it to __builtin__._, which overwrites # the __builtin__._ that gettext sets. Let's switch to using pprint # since it won't interact poorly with gettext, and it's easier to # read the output too. def displayhook(val): if val is not None: pprint.pprint(val) sys.displayhook = displayhook sock = _listen('localhost', start_port, end_port, eventlet.listen) # In the case of backdoor port being zero, a port number is assigned by # listen(). In any case, pull the port number out here. port = sock.getsockname()[1] LOG.info( _LI('Eventlet backdoor listening on %(port)s for process %(pid)d') % { 'port': port, 'pid': os.getpid() }) eventlet.spawn_n(eventlet.backdoor.backdoor_server, sock, locals=backdoor_locals) return port
def get_filtered_objects(self, filter_classes, objs, filter_properties, index=0): """Get objects after filter :param filter_classes: filters that will be used to filter the objects :param objs: objects that will be filtered :param filter_properties: client filter properties :param index: This value needs to be increased in the caller function of get_filtered_objects when handling each resource. """ list_objs = list(objs) LOG.debug("Starting with %d host(s)", len(list_objs)) for filter_cls in filter_classes: cls_name = filter_cls.__name__ filter_class = filter_cls() if filter_class.run_filter_for_index(index): objs = filter_class.filter_all(list_objs, filter_properties) if objs is None: LOG.debug("Filter %(cls_name)s says to stop filtering", {'cls_name': cls_name}) return list_objs = list(objs) msg = ( _LI("Filter %(cls_name)s returned %(obj_len)d host(s)") % { 'cls_name': cls_name, 'obj_len': len(list_objs) }) if not list_objs: LOG.info(msg) break LOG.debug(msg) return list_objs
def reconnect(self): """Handles reconnecting and re-establishing sessions and queues.""" delay = 1 while True: # Close the session if necessary if self.connection.opened(): try: self.connection.close() except qpid_exceptions.MessagingError: pass broker = self.brokers[next(self.next_broker_indices)] try: self.connection_create(broker) self.connection.open() except qpid_exceptions.MessagingError as e: msg_dict = dict(e=e, delay=delay) msg = _LE("Unable to connect to AMQP server: %(e)s. " "Sleeping %(delay)s seconds") % msg_dict LOG.error(msg) time.sleep(delay) delay = min(delay + 1, 5) else: LOG.info(_LI('Connected to AMQP server on %s'), broker) break self.session = self.connection.session() if self.consumers: consumers = self.consumers self.consumers = {} for consumer in six.itervalues(consumers): consumer.reconnect(self.session) self._register_consumer(consumer) LOG.debug("Re-established AMQP queues")
def _child_wait_for_exit_or_signal(self, launcher): status = 0 signo = 0 # NOTE(johannes): All exceptions are caught to ensure this # doesn't fallback into the loop spawning children. It would # be bad for a child to spawn more children. try: launcher.wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Child caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code except BaseException: LOG.exception(_LE('Unhandled exception')) status = 2 finally: launcher.stop() return status, signo
def initialize_if_enabled(): backdoor_locals = { 'exit': _dont_use_this, # So we don't exit the entire process 'quit': _dont_use_this, # So we don't exit the entire process 'fo': _find_objects, 'pgt': _print_greenthreads, 'pnt': _print_nativethreads, } if CONF.backdoor_port is None: return None start_port, end_port = _parse_port_range(str(CONF.backdoor_port)) # NOTE(johannes): The standard sys.displayhook will print the value of # the last expression and set it to __builtin__._, which overwrites # the __builtin__._ that gettext sets. Let's switch to using pprint # since it won't interact poorly with gettext, and it's easier to # read the output too. def displayhook(val): if val is not None: pprint.pprint(val) sys.displayhook = displayhook sock = _listen('localhost', start_port, end_port, eventlet.listen) # In the case of backdoor port being zero, a port number is assigned by # listen(). In any case, pull the port number out here. port = sock.getsockname()[1] LOG.info( _LI('Eventlet backdoor listening on %(port)s for process %(pid)d') % {'port': port, 'pid': os.getpid()} ) eventlet.spawn_n(eventlet.backdoor.backdoor_server, sock, locals=backdoor_locals) return port
def _consume(sock): LOG.info(_LI("Consuming socket")) while True: self.consume(sock)
def launch_service(self, service, workers=1): wrap = ServiceWrapper(service, workers) LOG.info(_LI('Starting %d workers'), wrap.workers) while self.running and len(wrap.children) < wrap.workers: self._start_child(wrap)