Exemple #1
0
def with_service(cephadm_module: CephadmOrchestrator,
                 spec: ServiceSpec,
                 meth=None,
                 host: str = '',
                 status_running=False) -> Iterator[List[str]]:
    if spec.placement.is_empty() and host:
        spec.placement = PlacementSpec(hosts=[host], count=1)
    if meth is not None:
        c = meth(cephadm_module, spec)
        assert wait(cephadm_module,
                    c) == f'Scheduled {spec.service_name()} update...'
    else:
        c = cephadm_module.apply([spec])
        assert wait(cephadm_module,
                    c) == [f'Scheduled {spec.service_name()} update...']

    specs = [
        d.spec for d in wait(cephadm_module, cephadm_module.describe_service())
    ]
    assert spec in specs

    CephadmServe(cephadm_module)._apply_all_services()

    if status_running:
        make_daemons_running(cephadm_module, spec.service_name())

    dds = wait(cephadm_module, cephadm_module.list_daemons())
    own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
    if host and spec.service_type != 'osd':
        assert own_dds

    yield [dd.name() for dd in own_dds]

    assert_rm_service(cephadm_module, spec.service_name())
Exemple #2
0
 def fence_old_ranks(self, spec: ServiceSpec,
                     rank_map: Dict[int, Dict[int, Optional[str]]],
                     num_ranks: int) -> None:
     for rank, m in list(rank_map.items()):
         if rank >= num_ranks:
             for daemon_id in m.values():
                 if daemon_id is not None:
                     self.fence(daemon_id)
             del rank_map[rank]
             nodeid = f'{spec.service_name()}.{rank}'
             self.mgr.log.info(
                 f'Removing {nodeid} from the ganesha grace table')
             self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove',
                                 nodeid)
             self.mgr.spec_store.save_rank_map(spec.service_name(),
                                               rank_map)
         else:
             max_gen = max(m.keys())
             for gen, daemon_id in list(m.items()):
                 if gen < max_gen:
                     if daemon_id is not None:
                         self.fence(daemon_id)
                     del rank_map[rank][gen]
                     self.mgr.spec_store.save_rank_map(
                         spec.service_name(), rank_map)
Exemple #3
0
    def test_daemon_ok_to_stop(self, ok_to_stop,
                               cephadm_module: CephadmOrchestrator):
        spec = ServiceSpec('mds',
                           service_id='fsname',
                           placement=PlacementSpec(hosts=['host1', 'host2']))
        with with_host(cephadm_module,
                       'host1'), with_host(cephadm_module, 'host2'):
            c = cephadm_module.apply_mds(spec)
            out = wait(cephadm_module, c)
            match_glob(out, "Scheduled mds.fsname update...")
            CephadmServe(cephadm_module)._apply_all_services()

            [daemon] = cephadm_module.cache.daemons['host1'].keys()

            spec.placement.set_hosts(['host2'])

            ok_to_stop.side_effect = False

            c = cephadm_module.apply_mds(spec)
            out = wait(cephadm_module, c)
            match_glob(out, "Scheduled mds.fsname update...")
            CephadmServe(cephadm_module)._apply_all_services()

            ok_to_stop.assert_called_with([daemon[4:]])

            assert_rm_daemon(cephadm_module, spec.service_name(),
                             'host1')  # verifies ok-to-stop
            assert_rm_daemon(cephadm_module, spec.service_name(), 'host2')
Exemple #4
0
 def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool:
     if (host not in self.devices or host not in self.last_device_change
             or host not in self.last_device_update
             or host not in self.osdspec_last_applied
             or spec.service_name() not in self.osdspec_last_applied[host]):
         return True
     created = self.mgr.spec_store.get_created(spec)
     if not created or created > self.last_device_change[host]:
         return True
     return self.osdspec_last_applied[host][
         spec.service_name()] < self.last_device_change[host]
Exemple #5
0
def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth, host: str):
    spec.placement = PlacementSpec(hosts=[host], count=1)

    c = meth(cephadm_module, spec)
    [out] = wait(cephadm_module, c)
    match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'")

    dds = cephadm_module.cache.get_daemons_by_service(spec.service_name())
    for dd in dds:
        if dd.hostname == host:
            yield dd.daemon_id
            assert_rm_daemon(cephadm_module, spec.service_name(), host)
            return

    assert False, 'Daemon not found'
Exemple #6
0
 def _apply_service_config(self, spec: ServiceSpec) -> None:
     if spec.config:
         section = utils.name_to_config_section(spec.service_name())
         for k, v in spec.config.items():
             try:
                 current = self.mgr.get_foreign_ceph_option(section, k)
             except KeyError:
                 self.log.warning(
                     f'Ignoring invalid {spec.service_name()} config option {k}'
                 )
                 self.mgr.events.for_service(
                     spec, OrchestratorEvent.ERROR, f'Invalid config option {k}'
                 )
                 continue
             if current != v:
                 self.log.debug(f'setting [{section}] {k} = {v}')
                 try:
                     self.mgr.check_mon_command({
                         'prefix': 'config set',
                         'name': k,
                         'value': str(v),
                         'who': section,
                     })
                 except MonCommandFailed as e:
                     self.log.warning(
                         f'Failed to set {spec.service_name()} option {k}: {e}'
                     )
Exemple #7
0
        def convert_to_explicit(spec: ServiceSpec) -> None:
            placements = HostAssignment(spec=spec,
                                        get_hosts_func=self.mgr._get_hosts,
                                        get_daemons_func=self.mgr.cache.
                                        get_daemons_by_service).place()

            existing_daemons = self.mgr.cache.get_daemons_by_service(
                spec.service_name())

            # We have to migrate, only if the new scheduler would remove daemons
            if len(placements) >= len(existing_daemons):
                return

            old_hosts = {h.hostname: h for h in spec.placement.hosts}
            new_hosts = [
                old_hosts[d.hostname] if d.hostname in old_hosts else
                HostPlacementSpec(hostname=d.hostname, network='', name='')
                for d in existing_daemons
            ]

            new_placement = PlacementSpec(hosts=new_hosts,
                                          count=spec.placement.count)

            new_spec = ServiceSpec.from_json(spec.to_json())
            new_spec.placement = new_placement

            logger.info(
                f"Migrating {spec.one_line_str()} to explicit placement")

            self.mgr.spec_store.save(new_spec)
Exemple #8
0
def test_daemon_description_service_name(spec: ServiceSpec,
                                         dd: DaemonDescription, valid: bool):
    if valid:
        assert spec.service_name() == dd.service_name()
    else:
        with pytest.raises(OrchestratorError):
            dd.service_name()
Exemple #9
0
 def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator):
     spec = ServiceSpec('mds', service_id='fsname')
     with with_host(cephadm_module, 'test'):
         with with_service(cephadm_module, spec, host='test'):
             ret, out, err = cephadm_module.check_mon_command({
                 'prefix': 'config get',
                 'who': spec.service_name(),
                 'key': 'mds_join_fs',
             })
             assert out == 'fsname'
         ret, out, err = cephadm_module.check_mon_command({
             'prefix': 'config get',
             'who': spec.service_name(),
             'key': 'mds_join_fs',
         })
         assert not out
Exemple #10
0
    def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator):
        with with_host(cephadm_module, 'test'):
            if not spec.placement:
                spec.placement = PlacementSpec(hosts=['test'], count=1)
            c = meth(cephadm_module, spec)
            assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
            assert [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] == [spec]

            cephadm_module._apply_all_services()

            dds = wait(cephadm_module, cephadm_module.list_daemons())
            for dd in dds:
                assert dd.service_name() == spec.service_name()


            assert_rm_service(cephadm_module, spec.service_name())
Exemple #11
0
        def convert_to_explicit(spec: ServiceSpec) -> None:
            existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
            placements, to_add, to_remove = HostAssignment(
                spec=spec,
                hosts=self.mgr.inventory.all_specs(),
                unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
                daemons=existing_daemons,
            ).place()

            # We have to migrate, only if the new scheduler would remove daemons
            if len(placements) >= len(existing_daemons):
                return

            def to_hostname(d: DaemonDescription) -> HostPlacementSpec:
                if d.hostname in old_hosts:
                    return old_hosts[d.hostname]
                else:
                    assert d.hostname
                    return HostPlacementSpec(d.hostname, '', '')

            old_hosts = {h.hostname: h for h in spec.placement.hosts}
            new_hosts = [to_hostname(d) for d in existing_daemons]

            new_placement = PlacementSpec(
                hosts=new_hosts,
                count=spec.placement.count
            )

            new_spec = ServiceSpec.from_json(spec.to_json())
            new_spec.placement = new_placement

            logger.info(f"Migrating {spec.one_line_str()} to explicit placement")

            self.mgr.spec_store.save(new_spec)
Exemple #12
0
def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth, host: str) -> Iterator[List[str]]:
    if spec.placement.is_empty():
        spec.placement = PlacementSpec(hosts=[host], count=1)
    c = meth(cephadm_module, spec)
    assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
    specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())]
    assert spec in specs

    cephadm_module._apply_all_services()

    dds = wait(cephadm_module, cephadm_module.list_daemons())
    own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
    assert own_dds

    yield [dd.name() for dd in own_dds]

    assert_rm_service(cephadm_module, spec.service_name())
Exemple #13
0
    def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module):
        with self._with_host(cephadm_module, 'test'):
            spec.placement = PlacementSpec(hosts=['test'], count=1)
            c = meth(cephadm_module, spec)
            assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
            assert [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] == [spec]

            assert_rm_service(cephadm_module, spec.service_name())
Exemple #14
0
    def test_daemon_add(self, spec: ServiceSpec, meth, cephadm_module):
        with with_host(cephadm_module, 'test'):
            spec.placement = PlacementSpec(hosts=['test'], count=1)

            c = meth(cephadm_module, spec)
            [out] = wait(cephadm_module, c)
            match_glob(out, f"Deployed {spec.service_name()}.* on host 'test'")

            assert_rm_daemon(cephadm_module, spec.service_name(), 'test')
Exemple #15
0
 def update_ha_rgw_definitive_hosts(self, spec: ServiceSpec, hosts: List[HostPlacementSpec],
                                    add_hosts: Set[HostPlacementSpec]) -> HA_RGWSpec:
     spec = cast(HA_RGWSpec, spec)
     if not (set(hosts) == set(spec.definitive_host_list)):
         spec.definitive_host_list = hosts
         ha_rgw_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
         for daemon in ha_rgw_daemons:
             if daemon.hostname in [h.hostname for h in hosts] and daemon.hostname not in add_hosts:
                 self.mgr.cache.schedule_daemon_action(
                     daemon.hostname, daemon.name(), 'reconfig')
     return spec
Exemple #16
0
    def save(
        self,
        spec: ServiceSpec,
        update_create: bool = True,
    ) -> None:
        name = spec.service_name()
        if spec.preview_only:
            self.spec_preview[name] = spec
            return None
        self._specs[name] = spec

        if update_create:
            self.spec_created[name] = datetime_now()
        self._save(name)
Exemple #17
0
 def update_ha_rgw_definitive_hosts(
         self,
         spec: ServiceSpec,
         hosts: List[DaemonPlacement],
         add_hosts: List[DaemonPlacement]
 ) -> HA_RGWSpec:
     spec = cast(HA_RGWSpec, spec)
     hostnames = [p.hostname for p in hosts]
     add_hostnames = [p.hostname for p in add_hosts]
     if not (set(hostnames) == set(spec.definitive_host_list)):
         spec.definitive_host_list = hostnames
         ha_rgw_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
         for daemon in ha_rgw_daemons:
             if daemon.hostname in hostnames and daemon.hostname not in add_hostnames:
                 assert daemon.hostname is not None
                 self.mgr.cache.schedule_daemon_action(
                     daemon.hostname, daemon.name(), 'reconfig')
     return spec
Exemple #18
0
    def save(self, spec: ServiceSpec, update_create: bool = True) -> None:
        name = spec.service_name()
        if spec.preview_only:
            self.spec_preview[name] = spec
            return None
        self._specs[name] = spec

        if update_create:
            self.spec_created[name] = datetime_now()

        data = {
            'spec': spec.to_json(),
            'created': datetime_to_str(self.spec_created[name]),
        }
        if name in self.spec_deleted:
            data['deleted'] = datetime_to_str(self.spec_deleted[name])

        self.mgr.set_store(
            SPEC_STORE_PREFIX + name,
            json.dumps(data, sort_keys=True),
        )
        self.mgr.events.for_service(spec, OrchestratorEvent.INFO,
                                    'service was created')
Exemple #19
0
    def _apply_service(self, spec: ServiceSpec) -> bool:
        """
        Schedule a service.  Deploy new daemons or remove old ones, depending
        on the target label and count specified in the placement.
        """
        self.mgr.migration.verify_no_migration()

        daemon_type = spec.service_type
        service_name = spec.service_name()
        if spec.unmanaged:
            self.log.debug('Skipping unmanaged service %s' % service_name)
            return False
        if spec.preview_only:
            self.log.debug('Skipping preview_only service %s' % service_name)
            return False
        self.log.debug('Applying service %s spec' % service_name)

        config_func = self._config_fn(daemon_type)

        if daemon_type == 'osd':
            self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec))
            # TODO: return True would result in a busy loop
            # can't know if daemon count changed; create_from_spec doesn't
            # return a solid indication
            return False

        daemons = self.mgr.cache.get_daemons_by_service(service_name)

        public_network = None
        if daemon_type == 'mon':
            ret, out, err = self.mgr.check_mon_command({
                'prefix': 'config get',
                'who': 'mon',
                'key': 'public_network',
            })
            if '/' in out:
                public_network = out.strip()
                self.log.debug('mon public_network is %s' % public_network)

        def matches_network(host):
            # type: (str) -> bool
            if not public_network:
                return False
            # make sure we have 1 or more IPs for that network on that
            # host
            return len(self.mgr.cache.networks[host].get(public_network,
                                                         [])) > 0

        ha = HostAssignment(
            spec=spec,
            hosts=self.mgr._hosts_with_daemon_inventory(),
            get_daemons_func=self.mgr.cache.get_daemons_by_service,
            filter_new_host=matches_network if daemon_type == 'mon' else None,
        )

        hosts: List[HostPlacementSpec] = ha.place()
        self.log.debug('Usable hosts: %s' % hosts)

        r = None

        # sanity check
        if daemon_type in ['mon', 'mgr'] and len(hosts) < 1:
            self.log.debug('cannot scale mon|mgr below 1 (hosts=%s)' % hosts)
            return False

        # add any?
        did_config = False

        add_daemon_hosts: Set[HostPlacementSpec] = ha.add_daemon_hosts(hosts)
        self.log.debug('Hosts that will receive new daemons: %s' %
                       add_daemon_hosts)

        remove_daemon_hosts: Set[
            orchestrator.DaemonDescription] = ha.remove_daemon_hosts(hosts)
        self.log.debug('Hosts that will loose daemons: %s' %
                       remove_daemon_hosts)

        for host, network, name in add_daemon_hosts:
            daemon_id = self.mgr.get_unique_name(daemon_type,
                                                 host,
                                                 daemons,
                                                 prefix=spec.service_id,
                                                 forcename=name)

            if not did_config and config_func:
                if daemon_type == 'rgw':
                    rgw_config_func = cast(Callable[[RGWSpec, str], None],
                                           config_func)
                    rgw_config_func(cast(RGWSpec, spec), daemon_id)
                else:
                    config_func(spec)
                did_config = True

            daemon_spec = self.mgr.cephadm_services[
                daemon_type].make_daemon_spec(host, daemon_id, network, spec)
            self.log.debug('Placing %s.%s on host %s' %
                           (daemon_type, daemon_id, host))

            try:
                daemon_spec = self.mgr.cephadm_services[
                    daemon_type].prepare_create(daemon_spec)
                self.mgr._create_daemon(daemon_spec)
                r = True
            except (RuntimeError, OrchestratorError) as e:
                self.mgr.events.for_service(
                    spec, 'ERROR',
                    f"Failed while placing {daemon_type}.{daemon_id}"
                    f"on {host}: {e}")
                # only return "no change" if no one else has already succeeded.
                # later successes will also change to True
                if r is None:
                    r = False
                continue

            # add to daemon list so next name(s) will also be unique
            sd = orchestrator.DaemonDescription(
                hostname=host,
                daemon_type=daemon_type,
                daemon_id=daemon_id,
            )
            daemons.append(sd)

        # remove any?
        def _ok_to_stop(
                remove_daemon_hosts: Set[orchestrator.DaemonDescription]
        ) -> bool:
            daemon_ids = [d.daemon_id for d in remove_daemon_hosts]
            r = self.mgr.cephadm_services[daemon_type].ok_to_stop(daemon_ids)
            return not r.retval

        while remove_daemon_hosts and not _ok_to_stop(remove_daemon_hosts):
            # let's find a subset that is ok-to-stop
            remove_daemon_hosts.pop()
        for d in remove_daemon_hosts:
            r = True
            # NOTE: we are passing the 'force' flag here, which means
            # we can delete a mon instances data.
            self.mgr._remove_daemon(d.name(), d.hostname)

        if r is None:
            r = False
        return r
Exemple #20
0
    def _apply_service(self, spec: ServiceSpec) -> bool:
        """
        Schedule a service.  Deploy new daemons or remove old ones, depending
        on the target label and count specified in the placement.
        """
        self.mgr.migration.verify_no_migration()

        service_type = spec.service_type
        service_name = spec.service_name()
        if spec.unmanaged:
            self.log.debug('Skipping unmanaged service %s' % service_name)
            return False
        if spec.preview_only:
            self.log.debug('Skipping preview_only service %s' % service_name)
            return False
        self.log.debug('Applying service %s spec' % service_name)

        if service_type == 'osd':
            self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec))
            # TODO: return True would result in a busy loop
            # can't know if daemon count changed; create_from_spec doesn't
            # return a solid indication
            return False

        daemons = self.mgr.cache.get_daemons_by_service(service_name)

        public_network = None
        if service_type == 'mon':
            out = str(self.mgr.get_foreign_ceph_option('mon',
                                                       'public_network'))
            if '/' in out:
                public_network = out.strip()
                self.log.debug('mon public_network is %s' % public_network)

        def matches_network(host):
            # type: (str) -> bool
            if not public_network:
                return False
            # make sure we have 1 or more IPs for that network on that
            # host
            return len(self.mgr.cache.networks[host].get(public_network,
                                                         [])) > 0

        def virtual_ip_allowed(host):
            # type: (str) -> bool
            # Verify that it is possible to use Virtual IPs in the host
            try:
                if self.mgr.cache.facts[host]['kernel_parameters'][
                        'net.ipv4.ip_nonlocal_bind'] == '0':
                    return False
            except KeyError:
                return False

            return True

        ha = HostAssignment(
            spec=spec,
            hosts=self.mgr._hosts_with_daemon_inventory(),
            get_daemons_func=self.mgr.cache.get_daemons_by_service,
            filter_new_host=matches_network if service_type == 'mon' else
            virtual_ip_allowed if service_type == 'ha-rgw' else None,
        )

        try:
            hosts: List[HostPlacementSpec] = ha.place()
            self.log.debug('Usable hosts: %s' % hosts)
        except OrchestratorError as e:
            self.log.error('Failed to apply %s spec %s: %s' %
                           (spec.service_name(), spec, e))
            self.mgr.events.for_service(spec, 'ERROR',
                                        'Failed to apply: ' + str(e))
            return False

        r = None

        # sanity check
        if service_type in ['mon', 'mgr'] and len(hosts) < 1:
            self.log.debug('cannot scale mon|mgr below 1 (hosts=%s)' % hosts)
            return False

        # add any?
        did_config = False

        add_daemon_hosts: Set[HostPlacementSpec] = ha.add_daemon_hosts(hosts)
        self.log.debug('Hosts that will receive new daemons: %s' %
                       add_daemon_hosts)

        remove_daemon_hosts: Set[
            orchestrator.DaemonDescription] = ha.remove_daemon_hosts(hosts)
        self.log.debug('Hosts that will loose daemons: %s' %
                       remove_daemon_hosts)

        if service_type == 'ha-rgw':
            spec = self.update_ha_rgw_definitive_hosts(spec, hosts,
                                                       add_daemon_hosts)

        for host, network, name in add_daemon_hosts:
            for daemon_type in service_to_daemon_types(service_type):
                daemon_id = self.mgr.get_unique_name(daemon_type,
                                                     host,
                                                     daemons,
                                                     prefix=spec.service_id,
                                                     forcename=name)

                if not did_config:
                    self.mgr.cephadm_services[service_type].config(
                        spec, daemon_id)
                    did_config = True

                daemon_spec = self.mgr.cephadm_services[
                    service_type].make_daemon_spec(host,
                                                   daemon_id,
                                                   network,
                                                   spec,
                                                   daemon_type=daemon_type)
                self.log.debug('Placing %s.%s on host %s' %
                               (daemon_type, daemon_id, host))

                try:
                    daemon_spec = self.mgr.cephadm_services[
                        service_type].prepare_create(daemon_spec)
                    self._create_daemon(daemon_spec)
                    r = True
                except (RuntimeError, OrchestratorError) as e:
                    self.mgr.events.for_service(
                        spec, 'ERROR',
                        f"Failed while placing {daemon_type}.{daemon_id}"
                        f"on {host}: {e}")
                    # only return "no change" if no one else has already succeeded.
                    # later successes will also change to True
                    if r is None:
                        r = False
                    continue

                # add to daemon list so next name(s) will also be unique
                sd = orchestrator.DaemonDescription(
                    hostname=host,
                    daemon_type=daemon_type,
                    daemon_id=daemon_id,
                )
                daemons.append(sd)

        # remove any?
        def _ok_to_stop(
                remove_daemon_hosts: Set[orchestrator.DaemonDescription]
        ) -> bool:
            daemon_ids = [d.daemon_id for d in remove_daemon_hosts]
            assert None not in daemon_ids
            # setting force flag retains previous behavior, should revisit later.
            r = self.mgr.cephadm_services[service_type].ok_to_stop(cast(
                List[str], daemon_ids),
                                                                   force=True)
            return not r.retval

        while remove_daemon_hosts and not _ok_to_stop(remove_daemon_hosts):
            # let's find a subset that is ok-to-stop
            remove_daemon_hosts.pop()
        for d in remove_daemon_hosts:
            r = True
            # NOTE: we are passing the 'force' flag here, which means
            # we can delete a mon instances data.
            assert d.hostname is not None
            self._remove_daemon(d.name(), d.hostname)

        if r is None:
            r = False
        return r
Exemple #21
0
 def for_service(self, spec: ServiceSpec, level: str, message: str) -> None:
     e = OrchestratorEvent(datetime_now(), 'service', spec.service_name(),
                           level, message)
     self.add(e)
Exemple #22
0
 def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]:
     return self.spec_created.get(spec.service_name())