def _remove_daemon(self, name: str, host: str) -> str: """ Remove a daemon """ (daemon_type, daemon_id) = name.split('.', 1) daemon = orchestrator.DaemonDescription( daemon_type=daemon_type, daemon_id=daemon_id, hostname=host) with set_exception_subject('service', daemon.service_id(), overwrite=True): self.mgr.cephadm_services[daemon_type_to_service(daemon_type)].pre_remove(daemon) args = ['--name', name, '--force'] self.log.info('Removing daemon %s from %s' % (name, host)) out, err, code = self._run_cephadm( host, name, 'rm-daemon', args) if not code: # remove item from cache self.mgr.cache.rm_daemon(host, name) self.mgr.cache.invalidate_host_daemons(host) self.mgr.cephadm_services[daemon_type_to_service(daemon_type)].post_remove(daemon) return "Removed {} from host '{}'".format(name, host)
def _wait_for_ok_to_stop( self, s: DaemonDescription, known: Optional[List[str]] = None, # NOTE: output argument! ) -> bool: # only wait a little bit; the service might go away for something assert s.daemon_type is not None assert s.daemon_id is not None tries = 4 while tries > 0: if not self.upgrade_state or self.upgrade_state.paused: return False # setting force flag to retain old functionality. # note that known is an output argument for ok_to_stop() r = self.mgr.cephadm_services[daemon_type_to_service( s.daemon_type)].ok_to_stop([s.daemon_id], known=known, force=True) if not r.retval: logger.info(f'Upgrade: {r.stdout}') return True logger.info(f'Upgrade: {r.stderr}') time.sleep(15) tries -= 1 return False
def _wait_for_ok_to_stop(self, s: DaemonDescription) -> bool: # only wait a little bit; the service might go away for something assert s.daemon_type is not None assert s.daemon_id is not None tries = 4 while tries > 0: if not self.upgrade_state or self.upgrade_state.paused: return False # setting force flag to retain old functionality. r = self.mgr.cephadm_services[daemon_type_to_service(s.daemon_type)].ok_to_stop([ s.daemon_id], force=True) if not r.retval: logger.info(f'Upgrade: {r.stdout}') return True logger.error(f'Upgrade: {r.stderr}') time.sleep(15) tries -= 1 return False
def _create_daemon(self, daemon_spec: CephadmDaemonDeploySpec, reconfig: bool = False, osd_uuid_map: Optional[Dict[str, Any]] = None, ) -> str: with set_exception_subject('service', orchestrator.DaemonDescription( daemon_type=daemon_spec.daemon_type, daemon_id=daemon_spec.daemon_id, hostname=daemon_spec.host, ).service_id(), overwrite=True): try: image = '' start_time = datetime_now() ports: List[int] = daemon_spec.ports if daemon_spec.ports else [] if daemon_spec.daemon_type == 'container': spec = cast(CustomContainerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) image = spec.image if spec.ports: ports.extend(spec.ports) if daemon_spec.daemon_type == 'cephadm-exporter': if not reconfig: assert daemon_spec.host self._deploy_cephadm_binary(daemon_spec.host) if daemon_spec.daemon_type == 'haproxy': haspec = cast(HA_RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec) if haspec.haproxy_container_image: image = haspec.haproxy_container_image if daemon_spec.daemon_type == 'keepalived': haspec = cast(HA_RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec) if haspec.keepalived_container_image: image = haspec.keepalived_container_image # TCP port to open in the host firewall if len(ports) > 0: daemon_spec.extra_args.extend([ '--tcp-ports', ' '.join(map(str, ports)) ]) # osd deployments needs an --osd-uuid arg if daemon_spec.daemon_type == 'osd': if not osd_uuid_map: osd_uuid_map = self.mgr.get_osd_uuid_map() osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id) if not osd_uuid: raise OrchestratorError('osd.%s not in osdmap' % daemon_spec.daemon_id) daemon_spec.extra_args.extend(['--osd-fsid', osd_uuid]) if reconfig: daemon_spec.extra_args.append('--reconfig') if self.mgr.allow_ptrace: daemon_spec.extra_args.append('--allow-ptrace') if self.mgr.cache.host_needs_registry_login(daemon_spec.host) and self.mgr.registry_url: self._registry_login(daemon_spec.host, self.mgr.registry_url, self.mgr.registry_username, self.mgr.registry_password) self.log.info('%s daemon %s on %s' % ( 'Reconfiguring' if reconfig else 'Deploying', daemon_spec.name(), daemon_spec.host)) out, err, code = self._run_cephadm( daemon_spec.host, daemon_spec.name(), 'deploy', [ '--name', daemon_spec.name(), '--meta-json', json.dumps({ 'service_name': daemon_spec.service_name, 'ports': daemon_spec.ports, 'ip': daemon_spec.ip, }), '--config-json', '-', ] + daemon_spec.extra_args, stdin=json.dumps(daemon_spec.final_config), image=image) # refresh daemon state? (ceph daemon reconfig does not need it) if not reconfig or daemon_spec.daemon_type not in CEPH_TYPES: if not code and daemon_spec.host in self.mgr.cache.daemons: # prime cached service state with what we (should have) # just created sd = daemon_spec.to_daemon_description( DaemonDescriptionStatus.running, 'starting') self.mgr.cache.add_daemon(daemon_spec.host, sd) if daemon_spec.daemon_type in [ 'grafana', 'iscsi', 'prometheus', 'alertmanager' ]: self.mgr.requires_post_actions.add(daemon_spec.daemon_type) self.mgr.cache.invalidate_host_daemons(daemon_spec.host) self.mgr.cache.update_daemon_config_deps( daemon_spec.host, daemon_spec.name(), daemon_spec.deps, start_time) self.mgr.cache.save_host(daemon_spec.host) msg = "{} {} on host '{}'".format( 'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(), daemon_spec.host) if not code: self.mgr.events.for_daemon(daemon_spec.name(), OrchestratorEvent.INFO, msg) else: what = 'reconfigure' if reconfig else 'deploy' self.mgr.events.for_daemon( daemon_spec.name(), OrchestratorEvent.ERROR, f'Failed to {what}: {err}') return msg except OrchestratorError: if not reconfig: # we have to clean up the daemon. E.g. keyrings. servict_type = daemon_type_to_service(daemon_spec.daemon_type) dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed') self.mgr.cephadm_services[servict_type].post_remove(dd) raise
def _check_daemons(self) -> None: daemons = self.mgr.cache.get_daemons() daemons_post: Dict[str, List[orchestrator.DaemonDescription]] = defaultdict(list) for dd in daemons: # orphan? spec = self.mgr.spec_store.active_specs.get(dd.service_name(), None) assert dd.hostname is not None assert dd.daemon_type is not None assert dd.daemon_id is not None if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']: # (mon and mgr specs should always exist; osds aren't matched # to a service spec) self.log.info('Removing orphan daemon %s...' % dd.name()) self._remove_daemon(dd.name(), dd.hostname) # ignore unmanaged services if spec and spec.unmanaged: continue # These daemon types require additional configs after creation if dd.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'nfs']: daemons_post[dd.daemon_type].append(dd) if self.mgr.cephadm_services[daemon_type_to_service(dd.daemon_type)].get_active_daemon( self.mgr.cache.get_daemons_by_service(dd.service_name())).daemon_id == dd.daemon_id: dd.is_active = True else: dd.is_active = False deps = self.mgr._calc_daemon_deps(dd.daemon_type, dd.daemon_id) last_deps, last_config = self.mgr.cache.get_daemon_last_config_deps( dd.hostname, dd.name()) if last_deps is None: last_deps = [] action = self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) if not last_config: self.log.info('Reconfiguring %s (unknown last config time)...' % ( dd.name())) action = 'reconfig' elif last_deps != deps: self.log.debug('%s deps %s -> %s' % (dd.name(), last_deps, deps)) self.log.info('Reconfiguring %s (dependencies changed)...' % ( dd.name())) action = 'reconfig' elif self.mgr.last_monmap and \ self.mgr.last_monmap > last_config and \ dd.daemon_type in CEPH_TYPES: self.log.info('Reconfiguring %s (monmap changed)...' % dd.name()) action = 'reconfig' elif self.mgr.extra_ceph_conf_is_newer(last_config) and \ dd.daemon_type in CEPH_TYPES: self.log.info('Reconfiguring %s (extra config changed)...' % dd.name()) action = 'reconfig' if action: if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \ and action == 'reconfig': action = 'redeploy' try: daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd) self.mgr._daemon_action(daemon_spec, action=action) self.mgr.cache.rm_scheduled_daemon_action(dd.hostname, dd.name()) except OrchestratorError as e: self.mgr.events.from_orch_error(e) if dd.daemon_type in daemons_post: del daemons_post[dd.daemon_type] # continue... except Exception as e: self.mgr.events.for_daemon_from_exception(dd.name(), e) if dd.daemon_type in daemons_post: del daemons_post[dd.daemon_type] # continue... # do daemon post actions for daemon_type, daemon_descs in daemons_post.items(): if daemon_type in self.mgr.requires_post_actions: self.mgr.requires_post_actions.remove(daemon_type) self.mgr._get_cephadm_service(daemon_type_to_service( daemon_type)).daemon_check_post(daemon_descs)