def _start_child(self, wrap): if len(wrap.forktimes) > wrap.workers: # Limit ourselves to one process a second (over the period of # number of workers * 1 second). This will allow workers to # start up quickly but ensure we don't fork off children that # die instantly too quickly. if time.time() - wrap.forktimes[0] < wrap.workers: LOG.info(_LI('Forking too fast, sleeping')) time.sleep(1) wrap.forktimes.pop(0) wrap.forktimes.append(time.time()) pid = os.fork() if pid == 0: launcher = self._child_process(wrap.service) while True: self._child_process_handle_signal() status, signo = self._child_wait_for_exit_or_signal(launcher) if not _is_sighup_and_daemon(signo): break launcher.restart() os._exit(status) LOG.info(_LI('Started child %d'), pid) wrap.children.add(pid) self.children[pid] = wrap return pid
def wait(self): """Loop waiting on children to die and respawning as necessary.""" systemd.notify_once() LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, logging.DEBUG) try: while True: self.handle_signal() self._respawn_children() # No signal means that stop was called. Don't clean up here. if not self.sigcaught: return signame = _signo_to_signame(self.sigcaught) LOG.info(_LI('Caught %s, stopping children'), signame) if not _is_sighup_and_daemon(self.sigcaught): break for pid in self.children: os.kill(pid, signal.SIGHUP) self.running = True self.sigcaught = None except eventlet.greenlet.GreenletExit: LOG.info(_LI("Wait called after thread killed. Cleaning up.")) self.stop()
def _wait_child(self): try: # Don't block if no child processes have exited pid, status = os.waitpid(0, os.WNOHANG) if not pid: return None except OSError as exc: if exc.errno not in (errno.EINTR, errno.ECHILD): raise return None if os.WIFSIGNALED(status): sig = os.WTERMSIG(status) LOG.info(_LI('Child %(pid)d killed by signal %(sig)d'), dict(pid=pid, sig=sig)) else: code = os.WEXITSTATUS(status) LOG.info(_LI('Child %(pid)s exited with status %(code)d'), dict(pid=pid, code=code)) if pid not in self.children: LOG.warning(_LW('pid %d not in child list'), pid) return None wrap = self.children.pop(pid) wrap.children.remove(pid) return wrap
def _wait_child(self): try: # Block while any of child processes have exited pid, status = os.waitpid(0, 0) if not pid: return None except OSError as exc: if exc.errno not in (errno.EINTR, errno.ECHILD): raise return None if os.WIFSIGNALED(status): sig = os.WTERMSIG(status) LOG.info(_LI('Child %(pid)d killed by signal %(sig)d'), dict(pid=pid, sig=sig)) else: code = os.WEXITSTATUS(status) LOG.info(_LI('Child %(pid)s exited with status %(code)d'), dict(pid=pid, code=code)) if pid not in self.children: LOG.warning(_LW('pid %d not in child list'), pid) return None wrap = self.children.pop(pid) wrap.children.remove(pid) return wrap
def _add_periodic_task(cls, task): """Add a periodic task to the list of periodic tasks. The task should already be decorated by @periodic_task. :return: whether task was actually enabled """ name = task._periodic_name if task._periodic_spacing < 0: LOG.info(_LI('Skipping periodic task %(task)s because ' 'its interval is negative'), {'task': name}) return False if not task._periodic_enabled: LOG.info(_LI('Skipping periodic task %(task)s because ' 'it is disabled'), {'task': name}) return False # A periodic spacing of zero indicates that this task should # be run on the default interval to avoid running too # frequently. if task._periodic_spacing == 0: task._periodic_spacing = DEFAULT_INTERVAL cls._periodic_tasks.append((name, task)) cls._periodic_spacing[name] = task._periodic_spacing return True
def wait(self): """Loop waiting on children to die and respawning as necessary.""" systemd.notify_once() LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, logging.DEBUG) try: while True: self.handle_signal() self._respawn_children() # No signal means that stop was called. Don't clean up here. if not self.sigcaught: return signame = _signo_to_signame(self.sigcaught) LOG.info(_LI('Caught %s, stopping children'), signame) if not _is_sighup_and_daemon(self.sigcaught): break cfg.CONF.reload_config_files() for service in set( [wrap.service for wrap in self.children.values()]): service.reset() for pid in self.children: os.kill(pid, signal.SIGHUP) self.running = True self.sigcaught = None except eventlet.greenlet.GreenletExit: LOG.info(_LI("Wait called after thread killed. Cleaning up.")) self.stop()
def acquire(self, retry=True): """Acquire a lock on the bay. :param retry: When True, retry if lock was released while stealing. """ lock_conductor_id = objects.BayLock.create(self.bay.uuid, self.conductor_id) if lock_conductor_id is None: LOG.debug("Conductor %(conductor)s acquired lock on bay " "%(bay)s" % {'conductor': self.conductor_id, 'bay': self.bay.uuid}) return if (lock_conductor_id == self.conductor_id or self.conductor_alive(self.context, lock_conductor_id)): LOG.debug("Lock on bay %(bay)s is owned by conductor " "%(conductor)s" % {'bay': self.bay.uuid, 'conductor': lock_conductor_id}) raise exception.OperationInProgress(bay_name=self.bay.name) else: LOG.info(_LI("Stale lock detected on bay %(bay)s. Conductor " "%(conductor)s will attempt to steal the lock"), {'bay': self.bay.uuid, 'conductor': self.conductor_id}) result = objects.BayLock.steal(self.bay.uuid, lock_conductor_id, self.conductor_id) if result is None: LOG.info(_LI("Conductor %(conductor)s successfully stole the " "lock on bay %(bay)s"), {'conductor': self.conductor_id, 'bay': self.bay.uuid}) return elif result is True: if retry: LOG.info(_LI("The lock on bay %(bay)s was released while " "conductor %(conductor)s was stealing it. " "Trying again"), {'bay': self.bay.uuid, 'conductor': self.conductor_id}) return self.acquire(retry=False) else: new_lock_conductor_id = result LOG.info(_LI("Failed to steal lock on bay %(bay)s. " "Conductor %(conductor)s stole the lock first"), {'bay': self.bay.uuid, 'conductor': new_lock_conductor_id}) raise exception.OperationInProgress(bay_name=self.bay.name)
def main(): cfg.CONF(sys.argv[1:], project='magnum') logging.setup('magnum') LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') conductor_id = short_id.generate_id() endpoints = [ docker_conductor.Handler(), k8s_conductor.Handler(), bay_k8s_heat.Handler(), conductor_listener.Handler(), ] if (not os.path.isfile(cfg.CONF.bay.k8s_atomic_template_path) and not os.path.isfile(cfg.CONF.bay.k8s_coreos_template_path)): LOG.error(_LE("The Heat template can not be found for either k8s " "atomic %(atomic_template)s or coreos " "(coreos_template)%s. Install template first if you " "want to create bay.") % {'atomic_template': cfg.CONF.bay.k8s_atomic_template_path, 'coreos_template': cfg.CONF.bay.k8s_coreos_template_path}) server = service.Service(cfg.CONF.conductor.topic, conductor_id, endpoints) server.serve()
def bay_delete(self, context, uuid): LOG.debug('k8s_heat bay_delete') osc = clients.OpenStackClients(context) bay = objects.Bay.get_by_uuid(context, uuid) stack_id = bay.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the bay, likely a user # has deleted the stack outside the context of Magnum. Therefore the # contents of the bay are forever lost. # # If the exception is unhandled, the original exception will be raised. try: osc.heat().stacks.delete(stack_id) except Exception as e: if isinstance(e, exc.HTTPNotFound): LOG.info(_LI('The stack %s was not be found during bay' ' deletion.') % stack_id) bay.destroy() return None else: raise self._poll_and_check(osc, bay) return None
def main(): cfg.CONF(sys.argv[1:], project='magnum') logging.setup('magnum') LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') cfg.CONF.import_opt('host', 'magnum.conductor.config', group='conductor') endpoints = [ docker_conductor.Handler(), k8s_conductor.Handler(), bay_k8s_heat.Handler() ] if not os.path.isfile(cfg.CONF.k8s_heat.template_path): LOG.error( _LE("The Heat template %s is not found. Install template.") % (cfg.CONF.k8s_heat.template_path)) exit(-1) server = service.Service(cfg.CONF.conductor.topic, cfg.CONF.conductor.host, endpoints) server.serve()
def poll_and_check(self): # TODO(yuanying): temporary implementation to update api_address, # node_addresses and bay status stack = self.openstack_client.heat().stacks.get(self.bay.stack_id) self.attempts += 1 # poll_and_check is detached and polling long time to check status, # so another user/client can call delete bay/stack. if stack.stack_status == 'DELETE_COMPLETE': LOG.info(_LI('Bay has been deleted, stack_id: %s') % self.bay.stack_id) self.bay.destroy() raise loopingcall.LoopingCallDone() if (stack.stack_status in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']): _update_stack_outputs(self.context, stack, self.bay) self.bay.status = stack.stack_status self.bay.save() raise loopingcall.LoopingCallDone() elif stack.stack_status != self.bay.status: self.bay.status = stack.stack_status self.bay.save() if stack.stack_status == 'CREATE_FAILED': LOG.error(_LE('Unable to create bay, stack_id: %(stack_id)s, ' 'reason: %(reason)s') % {'stack_id': self.bay.stack_id, 'reason': stack.stack_status_reason}) raise loopingcall.LoopingCallDone() if stack.stack_status == 'DELETE_FAILED': LOG.error(_LE('Unable to delete bay, stack_id: %(stack_id)s, ' 'reason: %(reason)s') % {'stack_id': self.bay.stack_id, 'reason': stack.stack_status_reason}) raise loopingcall.LoopingCallDone() if stack.stack_status == 'UPDATE_FAILED': LOG.error(_LE('Unable to update bay, stack_id: %(stack_id)s, ' 'reason: %(reason)s') % {'stack_id': self.bay.stack_id, 'reason': stack.stack_status_reason}) raise loopingcall.LoopingCallDone() # only check max attempts when the stack is being created when # the timeout hasn't been set. If the timeout has been set then # the loop will end when the stack completes or the timeout occurs if stack.stack_status == 'CREATE_IN_PROGRESS': if (stack.timeout_mins is None and self.attempts > cfg.CONF.k8s_heat.max_attempts): LOG.error(_LE('Bay check exit after %(attempts)s attempts,' 'stack_id: %(id)s, stack_status: %(status)s') % {'attempts': cfg.CONF.k8s_heat.max_attempts, 'id': self.bay.stack_id, 'status': stack.stack_status}) raise loopingcall.LoopingCallDone() else: if self.attempts > cfg.CONF.k8s_heat.max_attempts: LOG.error(_LE('Bay check exit after %(attempts)s attempts,' 'stack_id: %(id)s, stack_status: %(status)s') % {'attempts': cfg.CONF.k8s_heat.max_attempts, 'id': self.bay.stack_id, 'status': stack.stack_status}) raise loopingcall.LoopingCallDone()
def _pipe_watcher(self): # This will block until the write end is closed when the parent # dies unexpectedly self.readpipe.read() LOG.info(_LI('Parent process has died unexpectedly, exiting')) sys.exit(1)
def _pipe_watcher(self): # This will block until the write end is closed when the parent # dies unexpectedly self.readpipe.read(1) LOG.info(_LI('Parent process has died unexpectedly, exiting')) sys.exit(1)
def __init__(cls, names, bases, dict_): """Metaclass that allows us to collect decorated periodic tasks.""" super(_PeriodicTasksMeta, cls).__init__(names, bases, dict_) # NOTE(sirp): if the attribute is not present then we must be the base # class, so, go ahead an initialize it. If the attribute is present, # then we're a subclass so make a copy of it so we don't step on our # parent's toes. try: cls._periodic_tasks = cls._periodic_tasks[:] except AttributeError: cls._periodic_tasks = [] try: cls._periodic_spacing = cls._periodic_spacing.copy() except AttributeError: cls._periodic_spacing = {} for value in cls.__dict__.values(): if getattr(value, '_periodic_task', False): task = value name = task.__name__ if task._periodic_spacing < 0: LOG.info(_LI('Skipping periodic task %(task)s because ' 'its interval is negative'), {'task': name}) continue if not task._periodic_enabled: LOG.info(_LI('Skipping periodic task %(task)s because ' 'it is disabled'), {'task': name}) continue # A periodic spacing of zero indicates that this task should # be run on the default interval to avoid running too # frequently. if task._periodic_spacing == 0: task._periodic_spacing = DEFAULT_INTERVAL cls._periodic_tasks.append((name, task)) cls._periodic_spacing[name] = task._periodic_spacing
def _service_admin_creds(self): # Import auth_token to have keystone_authtoken settings setup. importutils.import_module('keystonemiddleware.auth_token') creds = { 'username': cfg.CONF.keystone_authtoken.admin_user, 'password': cfg.CONF.keystone_authtoken.admin_password, 'auth_url': self.v3_endpoint, 'endpoint': self.v3_endpoint, 'project_name': cfg.CONF.keystone_authtoken.admin_tenant_name } LOG.info(_LI('admin creds %s') % creds) return creds
def main(): service.prepare_service(sys.argv) app = api_app.setup_app() # Create the WSGI server and start it host, port = cfg.CONF.api.host, cfg.CONF.api.port srv = simple_server.make_server(host, port, app) LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) if host == '0.0.0.0': LOG.info(_LI('serving on 0.0.0.0:%(port)s, ' 'view at http://127.0.0.1:%(port)s') % dict(port=port)) else: LOG.info(_LI('serving on http://%(host)s:%(port)s') % dict(host=host, port=port)) srv.serve_forever()
def main(): service.prepare_service(sys.argv) app = api_app.setup_app() # Create the WSGI server and start it host, port = cfg.CONF.api.host, cfg.CONF.api.port srv = simple_server.make_server(host, port, app) LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) if host == '0.0.0.0': LOG.info( _LI('serving on 0.0.0.0:%(port)s, ' 'view at http://127.0.0.1:%(port)s') % dict(port=port)) else: LOG.info( _LI('serving on http://%(host)s:%(port)s') % dict(host=host, port=port)) srv.serve_forever()
def stop(self): """Terminate child processes and wait on each.""" self.running = False for pid in self.children: try: os.kill(pid, signal.SIGTERM) except OSError as exc: if exc.errno != errno.ESRCH: raise # Wait for children to die if self.children: LOG.info(_LI('Waiting on %d children to exit'), len(self.children)) while self.children: self._wait_child()
def get(url, allowed_schemes=('http', 'https')): """Get the data at the specified URL. The URL must use the http: or https: schemes. The file: scheme is also supported if you override the allowed_schemes argument. Raise an IOError if getting the data fails. """ LOG.info(_LI('Fetching data from %s'), url) components = urllib.parse.urlparse(url) if components.scheme not in allowed_schemes: raise URLFetchError(_('Invalid URL scheme %s') % components.scheme) if components.scheme == 'file': try: return urllib.request.urlopen(url).read() except urllib.error.URLError as uex: raise URLFetchError(_('Failed to retrieve manifest: %s') % uex) try: resp = requests.get(url, stream=True) resp.raise_for_status() # We cannot use resp.text here because it would download the # entire file, and a large enough file would bring down the # engine. The 'Content-Length' header could be faked, so it's # necessary to download the content in chunks to until # max_manifest_size is reached. The chunk_size we use needs # to balance CPU-intensive string concatenation with accuracy # (eg. it's possible to fetch 1000 bytes greater than # max_manifest_size with a chunk_size of 1000). reader = resp.iter_content(chunk_size=1000) result = "" for chunk in reader: result += chunk if len(result) > cfg.CONF.max_manifest_size: raise URLFetchError("Manifest exceeds maximum allowed size (%s" " bytes)" % cfg.CONF.max_manifest_size) return result except exceptions.RequestException as ex: raise URLFetchError(_('Failed to retrieve manifest: %s') % ex)
def _wait_for_exit_or_signal(self, ready_callback=None): status = None signo = 0 LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, logging.DEBUG) try: if ready_callback: ready_callback() super(ServiceLauncher, self).wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code finally: self.stop() return status, signo
def _child_wait_for_exit_or_signal(self, launcher): status = 0 signo = 0 # NOTE(johannes): All exceptions are caught to ensure this # doesn't fallback into the loop spawning children. It would # be bad for a child to spawn more children. try: launcher.wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Child caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code except BaseException: LOG.exception(_LE('Unhandled exception')) status = 2 finally: launcher.stop() return status, signo
def main(): cfg.CONF(sys.argv[1:], project='magnum') logging.setup('magnum') LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') cfg.CONF.import_opt('host', 'magnum.conductor.config', group='conductor') endpoints = [ docker_conductor.Handler(), k8s_conductor.Handler(), bay_k8s_heat.Handler() ] if not os.path.isfile(cfg.CONF.k8s_heat.template_path): LOG.error(_LE("The Heat template %s is not found. Install template.") % (cfg.CONF.k8s_heat.template_path)) exit(-1) server = service.Service(cfg.CONF.conductor.topic, cfg.CONF.conductor.host, endpoints) server.serve()
def initialize_if_enabled(): backdoor_locals = { 'exit': _dont_use_this, # So we don't exit the entire process 'quit': _dont_use_this, # So we don't exit the entire process 'fo': _find_objects, 'pgt': _print_greenthreads, 'pnt': _print_nativethreads, } if CONF.backdoor_port is None: return None start_port, end_port = _parse_port_range(str(CONF.backdoor_port)) # NOTE(johannes): The standard sys.displayhook will print the value of # the last expression and set it to __builtin__._, which overwrites # the __builtin__._ that gettext sets. Let's switch to using pprint # since it won't interact poorly with gettext, and it's easier to # read the output too. def displayhook(val): if val is not None: pprint.pprint(val) sys.displayhook = displayhook sock = _listen('localhost', start_port, end_port, eventlet.listen) # In the case of backdoor port being zero, a port number is assigned by # listen(). In any case, pull the port number out here. port = sock.getsockname()[1] LOG.info( _LI('Eventlet backdoor listening on %(port)s for process %(pid)d') % {'port': port, 'pid': os.getpid()} ) eventlet.spawn_n(eventlet.backdoor.backdoor_server, sock, locals=backdoor_locals) return port
def launch_service(self, service, workers=1): wrap = ServiceWrapper(service, workers) LOG.info(_LI('Starting %d workers'), wrap.workers) while self.running and len(wrap.children) < wrap.workers: self._start_child(wrap)