Beispiel #1
0
    def _remove_daemon(self, name: str, host: str) -> str:
        """
        Remove a daemon
        """
        (daemon_type, daemon_id) = name.split('.', 1)
        daemon = orchestrator.DaemonDescription(daemon_type=daemon_type,
                                                daemon_id=daemon_id,
                                                hostname=host)

        with set_exception_subject('service',
                                   daemon.service_id(),
                                   overwrite=True):

            self.mgr.cephadm_services[daemon_type_to_service(
                daemon_type)].pre_remove(daemon)

            args = ['--name', name, '--force']
            self.log.info('Removing daemon %s from %s' % (name, host))
            out, err, code = self._run_cephadm(host, name, 'rm-daemon', args)
            if not code:
                # remove item from cache
                self.mgr.cache.rm_daemon(host, name)
            self.mgr.cache.invalidate_host_daemons(host)

            self.mgr.cephadm_services[daemon_type_to_service(
                daemon_type)].post_remove(daemon)

            return "Removed {} from host '{}'".format(name, host)
Beispiel #2
0
 def post_remove(self, daemon: DaemonDescription) -> None:
     """
     Called after the daemon is removed.
     """
     assert daemon.daemon_type is not None
     assert self.TYPE == daemon_type_to_service(daemon.daemon_type)
     logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}')
Beispiel #3
0
 def _check_agent(self, host: str) -> bool:
     down = False
     try:
         assert self.mgr.cherrypy_thread
         assert self.mgr.cherrypy_thread.ssl_certs.get_root_cert()
     except Exception:
         self.mgr.log.debug(
             f'Delaying checking agent on {host} until cephadm endpoint finished creating root cert')
         return down
     if self.mgr.agent_helpers._agent_down(host):
         down = True
     try:
         agent = self.mgr.cache.get_daemons_by_type('agent', host=host)[0]
         assert agent.daemon_id is not None
         assert agent.hostname is not None
     except Exception as e:
         self.mgr.log.debug(
             f'Could not retrieve agent on host {host} from daemon cache: {e}')
         return down
     try:
         spec = self.mgr.spec_store.active_specs.get('agent', None)
         deps = self.mgr._calc_daemon_deps(spec, 'agent', agent.daemon_id)
         last_deps, last_config = self.mgr.agent_cache.get_agent_last_config_deps(host)
         if not last_config or last_deps != deps:
             # if root cert is the dep that changed, we must use ssh to reconfig
             # so it's necessary to check this one specifically
             root_cert_match = False
             try:
                 root_cert = self.mgr.cherrypy_thread.ssl_certs.get_root_cert()
                 if last_deps and root_cert in last_deps:
                     root_cert_match = True
             except Exception:
                 pass
             daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(agent)
             # we need to know the agent port to try to reconfig w/ http
             # otherwise there is no choice but a full ssh reconfig
             if host in self.mgr.agent_cache.agent_ports and root_cert_match and not down:
                 daemon_spec = self.mgr.cephadm_services[daemon_type_to_service(
                     daemon_spec.daemon_type)].prepare_create(daemon_spec)
                 self.mgr.agent_helpers._request_agent_acks(
                     hosts={daemon_spec.host},
                     increment=True,
                     daemon_spec=daemon_spec,
                 )
             else:
                 self.mgr._daemon_action(daemon_spec, action='reconfig')
             return down
     except Exception as e:
         self.mgr.log.debug(
             f'Agent on host {host} not ready to have config and deps checked: {e}')
     action = self.mgr.cache.get_scheduled_daemon_action(agent.hostname, agent.name())
     if action:
         try:
             daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(agent)
             self.mgr._daemon_action(daemon_spec, action=action)
             self.mgr.cache.rm_scheduled_daemon_action(agent.hostname, agent.name())
         except Exception as e:
             self.mgr.log.debug(
                 f'Agent on host {host} not ready to {action}: {e}')
     return down
Beispiel #4
0
    def _create_daemon(
        self,
        daemon_spec: CephadmDaemonSpec,
        reconfig: bool = False,
        osd_uuid_map: Optional[Dict[str, Any]] = None,
    ) -> str:

        with set_exception_subject('service',
                                   orchestrator.DaemonDescription(
                                       daemon_type=daemon_spec.daemon_type,
                                       daemon_id=daemon_spec.daemon_id,
                                       hostname=daemon_spec.host,
                                   ).service_id(),
                                   overwrite=True):

            image = ''
            start_time = datetime_now()
            ports: List[int] = daemon_spec.ports if daemon_spec.ports else []

            if daemon_spec.daemon_type == 'container':
                spec: Optional[CustomContainerSpec] = daemon_spec.spec
                if spec is None:
                    # Exit here immediately because the required service
                    # spec to create a daemon is not provided. This is only
                    # provided when a service is applied via 'orch apply'
                    # command.
                    msg = "Failed to {} daemon {} on {}: Required " \
                          "service specification not provided".format(
                              'reconfigure' if reconfig else 'deploy',
                              daemon_spec.name(), daemon_spec.host)
                    self.log.info(msg)
                    return msg
                image = spec.image
                if spec.ports:
                    ports.extend(spec.ports)

            if daemon_spec.daemon_type == 'cephadm-exporter':
                if not reconfig:
                    assert daemon_spec.host
                    deploy_ok = self._deploy_cephadm_binary(daemon_spec.host)
                    if not deploy_ok:
                        msg = f"Unable to deploy the cephadm binary to {daemon_spec.host}"
                        self.log.warning(msg)
                        return msg

            if daemon_spec.daemon_type == 'haproxy':
                haspec = cast(HA_RGWSpec, daemon_spec.spec)
                if haspec.haproxy_container_image:
                    image = haspec.haproxy_container_image

            if daemon_spec.daemon_type == 'keepalived':
                haspec = cast(HA_RGWSpec, daemon_spec.spec)
                if haspec.keepalived_container_image:
                    image = haspec.keepalived_container_image

            cephadm_config, deps = self.mgr.cephadm_services[
                daemon_type_to_service(
                    daemon_spec.daemon_type)].generate_config(daemon_spec)

            # TCP port to open in the host firewall
            if len(ports) > 0:
                daemon_spec.extra_args.extend(
                    ['--tcp-ports', ' '.join(map(str, ports))])

            # osd deployments needs an --osd-uuid arg
            if daemon_spec.daemon_type == 'osd':
                if not osd_uuid_map:
                    osd_uuid_map = self.mgr.get_osd_uuid_map()
                osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id)
                if not osd_uuid:
                    raise OrchestratorError('osd.%s not in osdmap' %
                                            daemon_spec.daemon_id)
                daemon_spec.extra_args.extend(['--osd-fsid', osd_uuid])

            if reconfig:
                daemon_spec.extra_args.append('--reconfig')
            if self.mgr.allow_ptrace:
                daemon_spec.extra_args.append('--allow-ptrace')

            if self.mgr.cache.host_needs_registry_login(
                    daemon_spec.host) and self.mgr.registry_url:
                self._registry_login(daemon_spec.host, self.mgr.registry_url,
                                     self.mgr.registry_username,
                                     self.mgr.registry_password)

            daemon_spec.extra_args.extend(['--config-json', '-'])

            self.log.info('%s daemon %s on %s' %
                          ('Reconfiguring' if reconfig else 'Deploying',
                           daemon_spec.name(), daemon_spec.host))

            out, err, code = self._run_cephadm(
                daemon_spec.host,
                daemon_spec.name(),
                'deploy', [
                    '--name',
                    daemon_spec.name(),
                ] + daemon_spec.extra_args,
                stdin=json.dumps(cephadm_config),
                image=image)
            if not code and daemon_spec.host in self.mgr.cache.daemons:
                # prime cached service state with what we (should have)
                # just created
                sd = orchestrator.DaemonDescription()
                sd.daemon_type = daemon_spec.daemon_type
                sd.daemon_id = daemon_spec.daemon_id
                sd.hostname = daemon_spec.host
                sd.status = 1
                sd.status_desc = 'starting'
                self.mgr.cache.add_daemon(daemon_spec.host, sd)
                if daemon_spec.daemon_type in [
                        'grafana', 'iscsi', 'prometheus', 'alertmanager'
                ]:
                    self.mgr.requires_post_actions.add(daemon_spec.daemon_type)
            self.mgr.cache.invalidate_host_daemons(daemon_spec.host)
            self.mgr.cache.update_daemon_config_deps(daemon_spec.host,
                                                     daemon_spec.name(), deps,
                                                     start_time)
            self.mgr.cache.save_host(daemon_spec.host)
            msg = "{} {} on host '{}'".format(
                'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(),
                daemon_spec.host)
            if not code:
                self.mgr.events.for_daemon(daemon_spec.name(),
                                           OrchestratorEvent.INFO, msg)
            else:
                what = 'reconfigure' if reconfig else 'deploy'
                self.mgr.events.for_daemon(daemon_spec.name(),
                                           OrchestratorEvent.ERROR,
                                           f'Failed to {what}: {err}')
            return msg
Beispiel #5
0
    def _check_daemons(self) -> None:

        daemons = self.mgr.cache.get_daemons()
        daemons_post: Dict[
            str, List[orchestrator.DaemonDescription]] = defaultdict(list)
        for dd in daemons:
            # orphan?
            spec = self.mgr.spec_store.specs.get(dd.service_name(), None)
            if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']:
                # (mon and mgr specs should always exist; osds aren't matched
                # to a service spec)
                self.log.info('Removing orphan daemon %s...' % dd.name())
                self._remove_daemon(dd.name(), dd.hostname)

            # ignore unmanaged services
            if spec and spec.unmanaged:
                continue

            # These daemon types require additional configs after creation
            if dd.daemon_type in [
                    'grafana', 'iscsi', 'prometheus', 'alertmanager', 'nfs'
            ]:
                daemons_post[dd.daemon_type].append(dd)

            if self.mgr.cephadm_services[daemon_type_to_service(
                    dd.daemon_type)].get_active_daemon(
                        self.mgr.cache.get_daemons_by_service(
                            dd.service_name())).daemon_id == dd.daemon_id:
                dd.is_active = True
            else:
                dd.is_active = False

            deps = self.mgr._calc_daemon_deps(dd.daemon_type, dd.daemon_id)
            last_deps, last_config = self.mgr.cache.get_daemon_last_config_deps(
                dd.hostname, dd.name())
            if last_deps is None:
                last_deps = []
            action = self.mgr.cache.get_scheduled_daemon_action(
                dd.hostname, dd.name())
            if not last_config:
                self.log.info(
                    'Reconfiguring %s (unknown last config time)...' %
                    (dd.name()))
                action = 'reconfig'
            elif last_deps != deps:
                self.log.debug('%s deps %s -> %s' %
                               (dd.name(), last_deps, deps))
                self.log.info('Reconfiguring %s (dependencies changed)...' %
                              (dd.name()))
                action = 'reconfig'
            elif self.mgr.last_monmap and \
                    self.mgr.last_monmap > last_config and \
                    dd.daemon_type in CEPH_TYPES:
                self.log.info('Reconfiguring %s (monmap changed)...' %
                              dd.name())
                action = 'reconfig'
            elif self.mgr.extra_ceph_conf_is_newer(last_config) and \
                    dd.daemon_type in CEPH_TYPES:
                self.log.info('Reconfiguring %s (extra config changed)...' %
                              dd.name())
                action = 'reconfig'
            if action:
                if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \
                        and action == 'reconfig':
                    action = 'redeploy'
                try:
                    self.mgr._daemon_action(daemon_type=dd.daemon_type,
                                            daemon_id=dd.daemon_id,
                                            host=dd.hostname,
                                            action=action)
                    self.mgr.cache.rm_scheduled_daemon_action(
                        dd.hostname, dd.name())
                except OrchestratorError as e:
                    self.mgr.events.from_orch_error(e)
                    if dd.daemon_type in daemons_post:
                        del daemons_post[dd.daemon_type]
                    # continue...
                except Exception as e:
                    self.mgr.events.for_daemon_from_exception(dd.name(), e)
                    if dd.daemon_type in daemons_post:
                        del daemons_post[dd.daemon_type]
                    # continue...

        # do daemon post actions
        for daemon_type, daemon_descs in daemons_post.items():
            if daemon_type in self.mgr.requires_post_actions:
                self.mgr.requires_post_actions.remove(daemon_type)
                self.mgr._get_cephadm_service(
                    daemon_type_to_service(daemon_type)).daemon_check_post(
                        daemon_descs)